In [None]:
#Pandas-Based Data Analysis Project

In [2]:
import pandas as pd

# Create a dictionary of sample data
data = {
    'location': ['USA', 'India', 'Brazil', 'Russia', 'UK', 'France', 'Germany', 'Japan', 'Italy', 'Canada'],
    'date': ['2022-12-31'] * 10,
    'total_cases': [10000000, 9000000, 8500000, 7000000, 6000000, 5500000, 5000000, 4800000, 4700000, 4500000],
    'total_deaths': [200000, 150000, 180000, 210000, 120000, 110000, 100000, 95000, 92000, 88000],
    'people_vaccinated_per_hundred': [75.5, 68.3, 70.1, 60.4, 80.2, 78.7, 74.3, 69.8, 72.1, 82.9]
}

# Convert to DataFrame
df = pd.DataFrame(data)

# Show the DataFrame
print(df)


  location        date  total_cases  total_deaths  \
0      USA  2022-12-31     10000000        200000   
1    India  2022-12-31      9000000        150000   
2   Brazil  2022-12-31      8500000        180000   
3   Russia  2022-12-31      7000000        210000   
4       UK  2022-12-31      6000000        120000   
5   France  2022-12-31      5500000        110000   
6  Germany  2022-12-31      5000000        100000   
7    Japan  2022-12-31      4800000         95000   
8    Italy  2022-12-31      4700000         92000   
9   Canada  2022-12-31      4500000         88000   

   people_vaccinated_per_hundred  
0                           75.5  
1                           68.3  
2                           70.1  
3                           60.4  
4                           80.2  
5                           78.7  
6                           74.3  
7                           69.8  
8                           72.1  
9                           82.9  


In [3]:
# 1. Countries with highest cases
top_cases = df.sort_values(by='total_cases', ascending=False)
print("\nTop Countries by Total Cases:\n", top_cases[['location', 'total_cases']])

# 2. Countries with highest deaths
top_deaths = df.sort_values(by='total_deaths', ascending=False)
print("\nTop Countries by Total Deaths:\n", top_deaths[['location', 'total_deaths']])

# 3. Mortality Rate
df['mortality_rate'] = (df['total_deaths'] / df['total_cases']) * 100
print("\nMortality Rate by Country:\n", df[['location', 'mortality_rate']])

# 4. Countries with >70% vaccinated
high_vax = df[df['people_vaccinated_per_hundred'] > 70]
print("\nCountries with >70% Vaccination:\n", high_vax[['location', 'people_vaccinated_per_hundred']])



Top Countries by Total Cases:
   location  total_cases
0      USA     10000000
1    India      9000000
2   Brazil      8500000
3   Russia      7000000
4       UK      6000000
5   France      5500000
6  Germany      5000000
7    Japan      4800000
8    Italy      4700000
9   Canada      4500000

Top Countries by Total Deaths:
   location  total_deaths
3   Russia        210000
0      USA        200000
2   Brazil        180000
1    India        150000
4       UK        120000
5   France        110000
6  Germany        100000
7    Japan         95000
8    Italy         92000
9   Canada         88000

Mortality Rate by Country:
   location  mortality_rate
0      USA        2.000000
1    India        1.666667
2   Brazil        2.117647
3   Russia        3.000000
4       UK        2.000000
5   France        2.000000
6  Germany        2.000000
7    Japan        1.979167
8    Italy        1.957447
9   Canada        1.955556

Countries with >70% Vaccination:
   location  people_vaccinated_per_h

In [5]:
high_vax = df[df['people_vaccinated_per_hundred'] > 70]
print("🔹 Countries with >70% Vaccination:\n", high_vax[['location', 'people_vaccinated_per_hundred']])


🔹 Countries with >70% Vaccination:
   location  people_vaccinated_per_hundred
0      USA                           75.5
2   Brazil                           70.1
4       UK                           80.2
5   France                           78.7
6  Germany                           74.3
8    Italy                           72.1
9   Canada                           82.9


In [4]:
top_cases = df.sort_values(by='total_cases', ascending=False).head(5)
print("🔹 Top 5 Countries by Total Cases:\n", top_cases[['location', 'total_cases']])


🔹 Top 5 Countries by Total Cases:
   location  total_cases
0      USA     10000000
1    India      9000000
2   Brazil      8500000
3   Russia      7000000
4       UK      6000000


In [6]:
def vax_category(percent):
    if percent >= 70:
        return "High"
    elif percent >= 50:
        return "Medium"
    else:
        return "Low"

df['vax_level'] = df['people_vaccinated_per_hundred'].apply(vax_category)
print("🔹 Countries Grouped by Vaccination Level:\n", df[['location', 'people_vaccinated_per_hundred', 'vax_level']])


🔹 Countries Grouped by Vaccination Level:
   location  people_vaccinated_per_hundred vax_level
0      USA                           75.5      High
1    India                           68.3    Medium
2   Brazil                           70.1      High
3   Russia                           60.4    Medium
4       UK                           80.2      High
5   France                           78.7      High
6  Germany                           74.3      High
7    Japan                           69.8    Medium
8    Italy                           72.1      High
9   Canada                           82.9      High


In [7]:
q1 = df['mortality_rate'].quantile(0.25)
q3 = df['mortality_rate'].quantile(0.75)
iqr = q3 - q1

lower_bound = q1 - 1.5 * iqr
upper_bound = q3 + 1.5 * iqr

outliers = df[(df['mortality_rate'] < lower_bound) | (df['mortality_rate'] > upper_bound)]
print("🔹 Outlier Countries by Mortality Rate:\n", outliers[['location', 'mortality_rate']])


🔹 Outlier Countries by Mortality Rate:
   location  mortality_rate
1    India        1.666667
2   Brazil        2.117647
3   Russia        3.000000
