In [1]:
# 1_imports_and_constants.py
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

# Standard month order including an 'Annual' row at the end
MONTH_ORDER = ["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec","Annual"]

In [2]:
# 2_load_table.py
def load_table(path):
    """
    Load .xlsx/.xls/.csv and return cleaned DataFrame with columns:
    Month, Daily_Min_C, Daily_Max_C, Rainfall_mm, Rainy_Days, Hail, Thunder, Fog, Squall
    """
    p = Path(path)
    if not p.exists():
        raise FileNotFoundError(f"Input file not found: {p}")

    # load based on extension
    if p.suffix.lower() in [".xls", ".xlsx"]:
        df = pd.read_excel(p)
    elif p.suffix.lower() in [".csv", ".txt"]:
        df = pd.read_csv(p)
    else:
        raise ValueError("Unsupported file type. Use .xlsx, .xls, or .csv")

    # Normalize header names (strip whitespace)
    df.columns = [c.strip() for c in df.columns]

    # Expected column names
    expected = ["Month", "Daily_Min_C", "Daily_Max_C", "Rainfall_mm",
                "Rainy_Days", "Hail", "Thunder", "Fog", "Squall"]

    # Map available columns to expected names (case-insensitive)
    col_map = {}
    for e in expected:
        for col in df.columns:
            if col.lower() == e.lower():
                col_map[col] = e
                break

    # Try a fuzzy match for 'Month' (if header isn't exact)
    if "Month" not in col_map.values():
        for col in df.columns:
            if "month" in col.lower():
                col_map[col] = "Month"
                break

    df = df.rename(columns=col_map)

    # Ensure all expected cols exist
    for e in expected:
        if e not in df.columns:
            df[e] = np.nan

    # Keep and order expected columns
    df = df[expected].copy()

    # Clean Month strings and set categorical ordering
    df["Month"] = df["Month"].astype(str).str.strip().str.replace(r"\.$","", regex=True).str.title()
    df["Month"] = pd.Categorical(df["Month"], categories=MONTH_ORDER, ordered=True)
    df = df.sort_values("Month").reset_index(drop=True)

    # Convert numeric columns
    for c in expected[1:]:
        df[c] = pd.to_numeric(df[c], errors="coerce")

    return df

In [3]:
# 3_do_eda_and_save.py
def do_eda_and_save(df, out_dir):
    """
    Prints info/describe/missing counts and saves cleaned csv to out_dir.
    """
    out_dir = Path(out_dir)
    out_dir.mkdir(parents=True, exist_ok=True)

    print("\nDataFrame info:")
    print(df.info())

    print("\nNumeric summary:")
    print(df.describe().T)

    print("\nMissing values (per column):")
    print(df.isnull().sum())

    cleaned_csv = out_dir / "srinagar_climatology_cleaned.csv"
    df.to_csv(cleaned_csv, index=False)
    print(f"\nSaved cleaned CSV: {cleaned_csv}")
    return cleaned_csv

In [4]:
# 4_plot_temperature.py
def plot_temperature(df, out_dir):
    out_dir = Path(out_dir)
    monthly = df[df["Month"].isin(MONTH_ORDER[:-1])].copy()
    if monthly.empty:
        print("No monthly rows found for temperature plotting.")
        return None

    plt.figure(figsize=(10,5))
    plt.plot(monthly["Month"].astype(str), monthly["Daily_Max_C"], marker="o", label="Mean Daily Max (°C)")
    plt.plot(monthly["Month"].astype(str), monthly["Daily_Min_C"], marker="o", label="Mean Daily Min (°C)")
    plt.title("Srinagar — Mean Daily Min & Max Temperatures (1991–2020)")
    plt.xlabel("Month")
    plt.ylabel("Temperature (°C)")
    plt.grid(axis="y", alpha=0.3)
    plt.legend()
    plt.tight_layout()

    out_file = Path(out_dir) / "srinagar_temp_trend.png"
    plt.savefig(out_file, dpi=300)
    plt.close()
    print(f"Saved temperature plot: {out_file}")
    return out_file

In [5]:
# 5_plot_rainfall.py
def plot_rainfall(df, out_dir):
    out_dir = Path(out_dir)
    monthly = df[df["Month"].isin(MONTH_ORDER[:-1])].copy()
    if monthly.empty:
        print("No monthly rows found for rainfall plotting.")
        return None

    plt.figure(figsize=(10,5))
    plt.bar(monthly["Month"].astype(str), monthly["Rainfall_mm"])
    plt.title("Srinagar — Mean Monthly Rainfall (1991–2020)")
    plt.xlabel("Month")
    plt.ylabel("Rainfall (mm)")
    plt.grid(axis="y", alpha=0.3)
    plt.tight_layout()

    out_file = Path(out_dir) / "srinagar_rainfall_bar.png"
    plt.savefig(out_file, dpi=300)
    plt.close()
    print(f"Saved rainfall plot: {out_file}")
    return out_file

In [6]:
# 6_find_extremes.py
def find_extremes(df, out_dir):
    out_dir = Path(out_dir)
    monthly = df[df["Month"].isin(MONTH_ORDER[:-1])].copy()
    if monthly.empty:
        print("No monthly rows found for extremes calculation.")
        return None

    def safe_idxmax(s):
        return None if s.isnull().all() else s.idxmax()
    def safe_idxmin(s):
        return None if s.isnull().all() else s.idxmin()

    results = []
    imax = safe_idxmax(monthly["Daily_Max_C"])
    if imax is not None:
        results.append(("Hottest", monthly.loc[imax, "Month"], "Daily_Max_C", monthly.loc[imax, "Daily_Max_C"]))
    imin = safe_idxmin(monthly["Daily_Min_C"])
    if imin is not None:
        results.append(("Coldest", monthly.loc[imin, "Month"], "Daily_Min_C", monthly.loc[imin, "Daily_Min_C"]))
    irain_max = safe_idxmax(monthly["Rainfall_mm"])
    if irain_max is not None:
        results.append(("Wettest", monthly.loc[irain_max, "Month"], "Rainfall_mm", monthly.loc[irain_max, "Rainfall_mm"]))
    irain_min = safe_idxmin(monthly["Rainfall_mm"])
    if irain_min is not None:
        results.append(("Driest", monthly.loc[irain_min, "Month"], "Rainfall_mm", monthly.loc[irain_min, "Rainfall_mm"]))

    extremes_df = pd.DataFrame(results, columns=["Type","Month","Metric","Value"])
    out_csv = out_dir / "srinagar_extremes_summary.csv"
    extremes_df.to_csv(out_csv, index=False)
    print(f"Saved extremes summary: {out_csv}")
    print("\nExtremes:\n", extremes_df)
    return extremes_df

In [7]:
# 7_run_all.py
if __name__ == "__main__":
    input_path = "Srinagar Dataset.xlsx"   # <-- change to your file name if needed
    out_dir = Path(".")  # current directory for outputs

    # 1. Load
    df = load_table(input_path)
    print("Loaded data:\n", df.head())

    # 2. EDA & save cleaned CSV
    cleaned_csv = do_eda_and_save(df, out_dir)

    # 3. Plots
    plot_temperature(df, out_dir)
    plot_rainfall(df, out_dir)

    # 4. Extremes
    find_extremes(df, out_dir)

    print("\nAll done. Check the current folder for CSV, PNGs, and summary.")

Loaded data:
   Month  Daily_Min_C  Daily_Max_C  Rainfall_mm  Rainy_Days  Hail  Thunder  \
0   Jan         -1.9          7.1         63.6         5.4   0.0      0.1   
1   Feb          0.7         10.5         85.0         6.0   0.0      0.5   
2   Mar          4.3         15.5        104.6         7.2   0.1      1.6   
3   Apr          7.9         20.6         91.8         7.0   0.3      4.3   
4   May         11.2         24.7         63.5         5.9   0.3      8.4   

   Fog  Squall  
0  1.1       0  
1  0.5       0  
2  0.0       0  
3  0.1       0  
4  0.1       0  

DataFrame info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13 entries, 0 to 12
Data columns (total 9 columns):
 #   Column       Non-Null Count  Dtype   
---  ------       --------------  -----   
 0   Month        13 non-null     category
 1   Daily_Min_C  13 non-null     float64 
 2   Daily_Max_C  13 non-null     float64 
 3   Rainfall_mm  13 non-null     float64 
 4   Rainy_Days   13 non-null     float64 
 