In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# Load dataset
file_path = "uttarakhand_environment_20years.csv"
df = pd.read_csv(file_path)

In [None]:
# Display basic information about the dataset
print("Dataset Info:")
print(df.info())
print("\nFirst 5 Rows:")
print(df.head())

In [4]:
# Convert Date column to datetime format if applicable
df['Date'] = pd.to_datetime(df['Date'])

In [None]:
# Plot Climate Conditions over the years
plt.figure(figsize=(10, 5))
sns.lineplot(x=df['Date'], y=df['Climatic_Conditions'].str.extract(r'(\d+)')[0].astype(float))
plt.title("Temperature Trend Over Years")
plt.xlabel("Year")
plt.ylabel("Temperature (°C)")
plt.grid()
plt.show()

In [None]:
# Urbanization vs. Pollution Analysis
plt.figure(figsize=(10, 5))
sns.scatterplot(x=df['Land_Use_Urbanization'].str.extract(r'(\d+)')[0].astype(float), 
                y=df['Pollution_Environmental_Degradation'].str.extract(r'(\d+)')[0].astype(float))
plt.title("Urbanization vs Pollution Levels")
plt.xlabel("Urbanization (%)")
plt.ylabel("AQI Levels")
plt.show()

In [None]:
# Correlation Heatmap
plt.figure(figsize=(10, 6))
df_numeric = df.drop(columns=['Region']).apply(lambda x: pd.to_numeric(x.str.extract(r'(\d+)')[0], errors='coerce'))
sns.heatmap(df_numeric.corr(), annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Correlation Heatmap")
plt.show()

In [None]:
# Save cleaned dataset
df.to_csv("uttarakhand_environment_cleaned.csv", index=False)
print("Cleaned dataset saved as uttarakhand_environment_cleaned.csv")