In [2]:
import pandas as pd

# Define the required columns (renaming "Country/Region" to "Location")
required_columns = ["Nom", "Location", "Confirmed", "Deaths", "Recovered", 
                    "Active", "New cases", "New deaths", "New recovered"]

# Load the CSV file
df = pd.read_csv("./country_wise_latest.csv")

# Rename "Country/Region" to "Location" if it exists in the DataFrame
if "Country/Region" in df.columns:
    df.rename(columns={"Country/Region": "Location"}, inplace=True)

# Drop rows with NaN values
clean_df = df.dropna()

# Add missing columns with a default value of -1
for column in required_columns:
    if column not in clean_df.columns:
        clean_df[column] = -1

if "Nom" in clean_df.columns:
    clean_df["Nom"] = clean_df["Nom"].replace(-1, "Coronavirus")

# Retain only the required columns
coronavirus_df = clean_df[required_columns]

# Display the cleaned DataFrame
coronavirus_df


Unnamed: 0,Nom,Location,Confirmed,Deaths,Recovered,Active,New cases,New deaths,New recovered
0,Coronavirus,Afghanistan,36263,1269,25198,9796,106,10,18
1,Coronavirus,Albania,4880,144,2745,1991,117,6,63
2,Coronavirus,Algeria,27973,1163,18837,7973,616,8,749
3,Coronavirus,Andorra,907,52,803,52,10,0,0
4,Coronavirus,Angola,950,41,242,667,18,1,0
...,...,...,...,...,...,...,...,...,...
182,Coronavirus,West Bank and Gaza,10621,78,3752,6791,152,2,0
183,Coronavirus,Western Sahara,10,1,8,1,0,0,0
184,Coronavirus,Yemen,1691,483,833,375,10,4,36
185,Coronavirus,Zambia,4552,140,2815,1597,71,1,465


In [3]:
import pandas as pd

# Define the required columns with correct names
required_columns = ["Nom", "Location", "Confirmed", "Deaths", "Recovered", 
                    "Active", "New cases", "New deaths", "New recovered"]

# Load the CSV file
df = pd.read_csv("./owid-monkeypox-data.csv")

# Rename columns to match required names
df.rename(columns={
    "Country/Region": "Location",
    "location": "Location",  # Ensuring "location" is properly capitalized
    "new_cases": "New cases",
    "new_deaths": "New deaths",
    "total_deaths": "Deaths",
    "total_cases": "Confirmed"
}, inplace=True)

# Drop rows with NaN values
clean_df = df.dropna()

# Add missing columns with a default value of -1
for column in required_columns:
    if column not in clean_df.columns:
        clean_df[column] = -1

# Replace -1 with "MonkeyPox" in the "Nom" column
if "Nom" in clean_df.columns:
    clean_df["Nom"] = clean_df["Nom"].replace(-1, "Monkeypox")

# Retain only the required columns
mpox_df = clean_df[required_columns]
mpox_df = mpox_df.groupby("Location", as_index=False).max()


mpox_df


Unnamed: 0,Location,Nom,Confirmed,Deaths,Recovered,Active,New cases,New deaths,New recovered
0,Africa,Monkeypox,1612.0,19.0,-1,-1,112.0,5.0,-1
1,Andorra,Monkeypox,4.0,0.0,-1,-1,2.0,0.0,-1
2,Argentina,Monkeypox,1129.0,2.0,-1,-1,96.0,1.0,-1
3,Aruba,Monkeypox,3.0,0.0,-1,-1,1.0,0.0,-1
4,Asia,Monkeypox,673.0,1.0,-1,-1,26.0,1.0,-1
...,...,...,...,...,...,...,...,...,...
113,United States,Monkeypox,30154.0,42.0,-1,-1,1392.0,6.0,-1
114,Uruguay,Monkeypox,19.0,0.0,-1,-1,3.0,0.0,-1
115,Venezuela,Monkeypox,12.0,0.0,-1,-1,3.0,0.0,-1
116,Vietnam,Monkeypox,2.0,0.0,-1,-1,1.0,0.0,-1


In [4]:
# Merge the two DataFrames and order by "Location"
merged_df = pd.concat([coronavirus_df, mpox_df])

# Sort by "Location"
merged_df = merged_df.sort_values(by="Location")

# Reset the index
merged_df = merged_df.reset_index(drop=True)

# Add an 'id' column starting from 1
merged_df.insert(0, "Id", range(1, len(merged_df) + 1))

# Save the merged DataFrame to a CSV file
output_file = "./merged_covid_monkeypox_data.csv"
merged_df.to_csv(output_file, index=False)

print(f"Merged data saved to: {output_file}")

# Display the merged DataFrame
merged_df


Merged data saved to: ./merged_covid_monkeypox_data.csv


Unnamed: 0,Id,Nom,Location,Confirmed,Deaths,Recovered,Active,New cases,New deaths,New recovered
0,1,Coronavirus,Afghanistan,36263.0,1269.0,25198,9796,106.0,10.0,18
1,2,Monkeypox,Africa,1612.0,19.0,-1,-1,112.0,5.0,-1
2,3,Coronavirus,Albania,4880.0,144.0,2745,1991,117.0,6.0,63
3,4,Coronavirus,Algeria,27973.0,1163.0,18837,7973,616.0,8.0,749
4,5,Monkeypox,Andorra,4.0,0.0,-1,-1,2.0,0.0,-1
...,...,...,...,...,...,...,...,...,...,...
300,301,Coronavirus,Western Sahara,10.0,1.0,8,1,0.0,0.0,0
301,302,Monkeypox,World,87376.0,140.0,-1,-1,1802.0,12.0,-1
302,303,Coronavirus,Yemen,1691.0,483.0,833,375,10.0,4.0,36
303,304,Coronavirus,Zambia,4552.0,140.0,2815,1597,71.0,1.0,465
