In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np  

In [11]:
# Load the dataset (replace with your file path)
df = pd.read_csv("covid_19_india.csv")

# Preview the first 5 rows
print(df.head())

# Check column names and data types
print(df.info())

   Sno        Date     Time State/UnionTerritory ConfirmedIndianNational  \
0    1  2020-01-30  6:00 PM               Kerala                       1   
1    2  2020-01-31  6:00 PM               Kerala                       1   
2    3  2020-02-01  6:00 PM               Kerala                       2   
3    4  2020-02-02  6:00 PM               Kerala                       3   
4    5  2020-02-03  6:00 PM               Kerala                       3   

  ConfirmedForeignNational  Cured  Deaths  Confirmed  
0                        0      0       0          1  
1                        0      0       0          1  
2                        0      0       0          2  
3                        0      0       0          3  
4                        0      0       0          3  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18110 entries, 0 to 18109
Data columns (total 9 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----

In [12]:
# Convert 'Date' column to datetime
df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%d')

# Group by date to get national totals for confirmed cases
national_df = df.groupby('Date')['Confirmed'].sum().reset_index()

# Sort by date for chronological order
national_df = national_df.sort_values('Date')

# Handle missing values (if any)
national_df.dropna(inplace=True)

# Preview the aggregated data
print(national_df.head())

        Date  Confirmed
0 2020-01-30          1
1 2020-01-31          1
2 2020-02-01          2
3 2020-02-02          3
4 2020-02-03          3


In [None]:
# Create a line plot
plt.figure(figsize=(10, 6))  # Set plot size (width, height)
plt.plot(national_df['Date'], national_df['Confirmed'], label='Confirmed Cases', color='blue')

# Add labels and title
plt.xlabel('Date')
plt.ylabel('Number of Confirmed Cases')
plt.title('COVID-19 Confirmed Cases in India Over Time')

# Add legend and grid
plt.legend()
plt.grid(True)

# Rotate x-axis labels for readability
plt.xticks(rotation=45)

# Adjust layout to prevent label cutoff
plt.tight_layout()

# Save the plot as an image
plt.savefig('plots/cases_over_time.png')

# Display the plot
plt.show()

In [14]:
# Calculate basic statistics using NumPy
mean_cases = np.mean(national_df['Confirmed'])
max_cases = np.max(national_df['Confirmed'])

# Print statistics
print(f"Mean Confirmed Cases: {mean_cases:.2f}")
print(f"Maximum Confirmed Cases: {max_cases}")

Mean Confirmed Cases: 9735140.51
Maximum Confirmed Cases: 32036511
