In [None]:
# -----------------------------------------------------------
# Weather Data Analysis â€“ Full Assignment Solution (Jupyter)
# -----------------------------------------------------------

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

# ========================
# Task 1: Load CSV
# ========================

# ðŸ”¹ If your file is downloaded locally, put filename here:
CSV_FILE = "weather.csv"     # <-- replace with your CSV name

# Create output folder
os.makedirs("output", exist_ok=True)

# Load data
df = pd.read_csv(CSV_FILE)

print("=== HEAD ===")
display(df.head())

print("=== INFO ===")
display(df.info())

print("=== DESCRIBE ===")
display(df.describe(include="all"))

# ========================
# Task 2: Data Cleaning
# ========================

# 1. Convert date column to datetime
# Auto-detect date column
date_col = None
for c in df.columns:
    if "date" in c.lower():
        date_col = c
        break

df[date_col] = pd.to_datetime(df[date_col])

# 2. Select important columns
possible_cols = {
    "temp": ["temp", "temperature", "mean temperature", "temp_mean_c"],
    "rain": ["rain", "precip", "precipitation", "rainfall", "precip_mm"],
    "humidity": ["humidity", "hum", "hum_mean"]
}

def find_col(keywords):
    for col in df.columns:
        if any(k.lower() in col.lower() for k in keywords):
            return col
    return None

temp_col = find_col(possible_cols["temp"])
rain_col = find_col(possible_cols["rain"])
hum_col  = find_col(possible_cols["humidity"])

print("Detected Columns:")
print("Temperature =", temp_col)
print("Rainfall    =", rain_col)
print("Humidity    =", hum_col)

# 3. Keep only required columns
clean_df = df[[date_col, temp_col, rain_col, hum_col]].copy()

# 4. Handle missing values
clean_df[temp_col] = clean_df[temp_col].fillna(method="ffill")
clean_df[rain_col] = pd.to_numeric(clean_df[rain_col], errors="coerce").fillna(0)
clean_df[hum_col]  = clean_df[hum_col].fillna(clean_df[hum_col].median())

# Set index
clean_df = clean_df.sort_values(date_col).set_index(date_col)

print("Cleaned Data:")
display(clean_df.head())

# ================================
# Task 3: Statistical Analysis
# ================================

daily_mean = np.mean(clean_df[temp_col])
daily_min  = np.min(clean_df[temp_col])
daily_max  = np.max(clean_df[temp_col])
daily_std  = np.std(clean_df[temp_col])

print("\n=== DAILY STATISTICS (NUMPY) ===")
print("Mean Temperature =", daily_mean)
print("Min Temperature  =", daily_min)
print("Max Temperature  =", daily_max)
print("Std Deviation    =", daily_std)

# Monthly stats
monthly_stats = clean_df.resample("M").agg({
    temp_col: ["mean","min","max","std"],
    rain_col: ["sum","mean"],
    hum_col:  ["mean"]
})

print("\n=== MONTHLY STATISTICS ===")
display(monthly_stats)

# Yearly stats
yearly_stats = clean_df.resample("Y").agg({
    temp_col: ["mean","min","max","std"]
})

print("\n=== YEARLY STATISTICS ===")
display(yearly_stats)

# ================================
# Task 4: Visualizations
# ================================

# ---- Line Chart: Daily Temperature ----
plt.figure(figsize=(10,4))
plt.plot(clean_df.index, clean_df[temp_col])
plt.title("Daily Temperature Trend")
plt.xlabel("Date")
plt.ylabel("Temperature")
plt.tight_layout()
plt.savefig("output/daily_temperature.png")
plt.show()

# ---- Bar Chart: Monthly Rainfall ----
monthly_rain = clean_df[rain_col].resample("M").sum()

plt.figure(figsize=(10,4))
plt.bar(monthly_rain.index.strftime("%Y-%m"), monthly_rain.values)
plt.title("Monthly Rainfall Total")
plt.xticks(rotation=45)
plt.ylabel("Rainfall")
plt.tight_layout()
plt.savefig("output/monthly_rainfall.png")
plt.show()

# ---- Scatter Plot: Humidity vs Temperature ----
plt.figure(figsize=(6,5))
plt.scatter(clean_df[temp_col], clean_df[hum_col])
plt.title("Humidity vs Temperature")
plt.xlabel("Temperature")
plt.ylabel("Humidity")
plt.tight_layout()
plt.savefig("output/humidity_vs_temperature.png")
plt.show()

# ---- Combined Plot ----
fig, ax = plt.subplots(1,2, figsize=(12,4))

# line plot
ax[0].plot(clean_df.index, clean_df[temp_col])
ax[0].set_title("Daily Temperature")

# scatter plot
ax[1].scatter(clean_df[temp_col], clean_df[hum_col])
ax[1].set_title("Humidity vs Temp")

plt.tight_layout()
plt.savefig("output/combined_plots.png")
plt.show()

# ================================
# Task 5: Grouping & Aggregation
# ================================

clean_df["month"] = clean_df.index.month

monthly_group = clean_df.groupby("month").agg({
    temp_col: "mean",
    rain_col: "sum",
    hum_col:  "mean"
})

print("\n=== GROUPED BY MONTH ===")
display(monthly_group)

# ================================
# Task 6: Export Results
# ================================

# Export cleaned CSV
clean_df.to_csv("output/cleaned_weather.csv")
print("\nSaved: output/cleaned_weather.csv")

# Create markdown report
report = f"""
# Weather Data Analysis Report

## Overview
This report contains analysis of real-world weather data for climate awareness and sustainability.

## Key Statistics
- Mean Daily Temperature: {daily_mean:.2f}
- Minimum Temperature: {daily_min:.2f}
- Maximum Temperature: {daily_max:.2f}
- Std Deviation: {daily_std:.2f}

## Insights
- Trend shows variation in daily temperature across the year.
- Rainfall varies significantly month-to-month.
- Humidity and temperature show a visible relationship in the scatter plot.

## Files Generated
- Cleaned CSV (cleaned_weather.csv)
- Daily Temperature Line Plot
- Monthly Rainfall Bar Plot
- Humidity vs Temperature Scatter Plot
- Combined Plot

"""

with open("output/weather_report.md", "w") as f:
    f.write(report)

print("\nSaved: output/weather_report.md")
print("\nAll tasks completed successfully!")