<a href="https://colab.research.google.com/github/Parikshit-26/Data_Science_Project/blob/main/Data_Science_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd

# Load the dataset
file_name = "Pune-D49-Financial_Health_2014-20.csv"
df = pd.read_csv("/content/Pune-D49-Financial_Health_2014-20.csv")

# Display initial information
print("Initial DataFrame Head:")
print(df.head())

print("\nInitial DataFrame Info:")
df.info()

Initial DataFrame Head:
  City Name     Year  Municipal Revenue Amount_inCrore  \
0      Pune  2019-20                           4446.68   
1      Pune  2018-19                           4391.06   
2      Pune  2017-18                           4306.59   
3      Pune  2016-17                           3728.48   
4      Pune  2015-16                           4037.33   

   Municipal Expenditure\nAmount_inCrore  Surplus/Deficit_inCrore  
0                                4461.51                   -14.83  
1                                4551.44                  -160.38  
2                                3902.84                   403.75  
3                                4089.31                  -360.83  
4                                3484.43                   552.90  

Initial DataFrame Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 5 columns):
 #   Column                                Non-Null Count  Dtype  
---  ------                

In [None]:
# Clean column names
df.columns = [
    'City_Name',
    'Year',
    'Revenue_Crore',
    'Expenditure_Crore',
    'Surplus_Deficit_Crore'
]

# Ensure the year column is treated as ordered categorical for correct plotting order
# We will use the middle year for sorting/labeling if needed, but for line plots,
# using the existing string order will work fine given the format 'YYYY-YY'.
df['Year'] = df['Year'].astype(str)

print("Cleaned DataFrame Head:")
print(df.head())

# --- Plot 1: Line Plot (Time Series) ---
import matplotlib.pyplot as plt
import numpy as np

plt.figure(figsize=(10, 6))

# Plot Revenue and Expenditure
plt.plot(df['Year'], df['Revenue_Crore'], marker='o', label='Revenue')
plt.plot(df['Year'], df['Expenditure_Crore'], marker='s', label='Expenditure')
# Plot Surplus/Deficit
plt.bar(df['Year'], df['Surplus_Deficit_Crore'], color=np.where(df['Surplus_Deficit_Crore'] >= 0, 'g', 'r'), alpha=0.5, label='Surplus (Green) / Deficit (Red)')

plt.title('Pune Municipal Financial Health (2014-15 to 2019-20)')
plt.xlabel('Fiscal Year')
plt.ylabel('Amount (₹ in Crore)')
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.legend()
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.savefig("financial_time_series_line_bar_plot.png")
plt.close()


# --- Plot 2: Scatter Plot (Revenue vs. Expenditure) ---
plt.figure(figsize=(7, 7))
plt.scatter(df['Revenue_Crore'], df['Expenditure_Crore'], c=df['Surplus_Deficit_Crore'], cmap='RdYlGn', s=200, alpha=0.8)

# Add a reference line (Y=X) where Revenue equals Expenditure (zero surplus/deficit)
min_val = min(df[['Revenue_Crore', 'Expenditure_Crore']].min()) - 100
max_val = max(df[['Revenue_Crore', 'Expenditure_Crore']].max()) + 100
plt.plot([min_val, max_val], [min_val, max_val], 'k--', alpha=0.5, label='Break-Even Line (Revenue = Expenditure)')

plt.colorbar(label='Surplus/Deficit (₹ in Crore)')
plt.title('Revenue vs. Expenditure Colored by Surplus/Deficit')
plt.xlabel('Revenue (₹ in Crore)')
plt.ylabel('Expenditure (₹ in Crore)')
plt.legend(loc='lower right')
plt.grid(linestyle='--', alpha=0.6)
plt.tight_layout()
plt.savefig("revenue_vs_expenditure_scatter_plot.png")
plt.close()


# --- Plot 3: Box Plot (Surplus/Deficit Distribution) ---
plt.figure(figsize=(4, 6))
plt.boxplot(df['Surplus_Deficit_Crore'])
plt.title('Distribution of Annual Surplus/Deficit')
plt.ylabel('Surplus/Deficit (₹ in Crore)')
plt.xticks([1], ['Pune']) # Label the single box plot
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.savefig("surplus_deficit_box_plot.png")
plt.close()

Cleaned DataFrame Head:
  City_Name     Year  Revenue_Crore  Expenditure_Crore  Surplus_Deficit_Crore
0      Pune  2019-20        4446.68            4461.51                 -14.83
1      Pune  2018-19        4391.06            4551.44                -160.38
2      Pune  2017-18        4306.59            3902.84                 403.75
3      Pune  2016-17        3728.48            4089.31                -360.83
4      Pune  2015-16        4037.33            3484.43                 552.90
