# Analysis of COVID-19 Cases in India
This notebook analyzes a dataset of COVID-19 cases in India to visualize trends and distributions using various plots.

## 1. Import, Explore, and Prepare Dataset

In [None]:

    import pandas as pd
    import matplotlib.pyplot as plt
    import seaborn as sns
    
    # Set plot style
    sns.set_style('whitegrid')
    
    # Load the dataset
    df = pd.read_csv('covid.csv')
    
    # --- Data Preparation ---
    # Convert 'Date' column to datetime objects
    df['Date'] = pd.to_datetime(df['Date'])
    
    # Calculate 'Active' cases
    df['Active'] = df['Confirmed'] - df['Recovered'] - df['Deceased']
    
    # Calculate 'Daily New Cases'
    df = df.sort_values(['State', 'Date'])
    df['Daily New Cases'] = df.groupby('State')['Confirmed'].diff().fillna(df['Confirmed'])
    
    print("Data prepared successfully. A few rows from the dataset:")
    print(df.head())
    print("\nData Info:")
    df.info()
    

## 2. Bar Plot: Total Confirmed Cases by State

In [None]:

    # Get total cases by finding the maximum 'Confirmed' value for each state
    total_cases_by_state = df.groupby('State')['Confirmed'].max().sort_values(ascending=False)
    
    # Plot the top 15 states
    plt.figure(figsize=(12, 8))
    sns.barplot(x=total_cases_by_state.head(15).values, y=total_cases_by_state.head(15).index)
    plt.title('Total Confirmed COVID-19 Cases by State (Top 15)')
    plt.xlabel('Total Confirmed Cases')
    plt.ylabel('State')
    plt.show()
    

## 3. Line Chart: Daily New Cases Over Time (All India)

In [None]:

    # Get total daily new cases for the entire country
    daily_cases_india = df.groupby('Date')['Daily New Cases'].sum()
    
    # Plot the line chart
    plt.figure(figsize=(14, 7))
    sns.lineplot(x=daily_cases_india.index, y=daily_cases_india.values)
    plt.title('Daily New COVID-19 Cases in India Over Time')
    plt.xlabel('Date')
    plt.ylabel('Daily New Cases')
    plt.show()
    

## 4. Histogram: Distribution of Total Confirmed Cases

In [None]:

    # Use the total_cases_by_state data calculated earlier
    plt.figure(figsize=(10, 6))
    sns.histplot(total_cases_by_state, bins=15, kde=True)
    plt.title('Distribution of Total Confirmed Cases Across States')
    plt.xlabel('Total Confirmed Cases')
    plt.ylabel('Number of States')
    plt.show()
    

## 5. Box Plot: Active Cases by State

In [None]:

    # Select the top 7 states by total cases for a clearer plot
    top_states = total_cases_by_state.head(7).index
    df_top_states = df[df['State'].isin(top_states)]
    
    # Draw the box plot
    plt.figure(figsize=(15, 8))
    sns.boxplot(x='State', y='Active', data=df_top_states)
    plt.title('Distribution of Daily Active Cases for Top States')
    plt.xlabel('State')
    plt.ylabel('Active Cases')
    plt.show()
    

## 6. Pie Chart: Proportion of Total Cases by State

In [None]:

    # Use total cases data, grouping smaller states into 'Other'
    top_7_states = total_cases_by_state.head(7)
    other_cases = total_cases_by_state.iloc[7:].sum()
    pie_data = top_7_states.append(pd.Series({'Other States': other_cases}))
    
    # Draw the pie chart
    plt.figure(figsize=(10, 10))
    plt.pie(pie_data, labels=pie_data.index, autopct='%1.1f%%', startangle=140)
    plt.title('Proportion of Total COVID-19 Cases by State')
    plt.axis('equal') # Ensures the pie is circular
    plt.show()
    

## 7. Scatter Plot: Confirmed vs. Active Cases

In [None]:

    # Get the latest data for each state
    latest_data_per_state = df.loc[df.groupby('State')['Date'].idxmax()]
    
    # Draw the scatter plot
    plt.figure(figsize=(12, 8))
    sns.scatterplot(x='Confirmed', y='Active', data=latest_data_per_state, hue='State', s=100, legend=False)
    plt.title('Total Confirmed vs. Active Cases (Latest Data)')
    plt.xlabel('Total Confirmed Cases')
    plt.ylabel('Current Active Cases')
    # Add labels for a few states for context
    for i in range(0, latest_data_per_state.shape[0], 5): # Label every 5th state
        plt.text(latest_data_per_state['Confirmed'].iloc[i], latest_data_per_state['Active'].iloc[i], latest_data_per_state['State'].iloc[i], fontdict={'size':8})
    plt.show()
    

## 8. Pair Plot of Key Metrics

In [None]:

    # Use the latest data for each state for the pair plot
    pairplot_data = latest_data_per_state[['Confirmed', 'Recovered', 'Deceased', 'Active']]
    
    # Draw the pair plot
    sns.pairplot(pairplot_data)
    plt.suptitle('Pairwise Relationships of COVID-19 Metrics by State', y=1.02)
    plt.show()
    