In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import datetime as dt

In [None]:
data = "C:\\Users\\IITA-GISguest2\\Downloads\\GIS\\data\\1990-2023.csv"

In [None]:
temp = pd.read_csv(data)
temp

In [None]:
max_temp = temp[["Date", "Maximum Temperature"]]
min_temp = temp[["Date", "Minimum Temperature"]]
max_temp

In [None]:
max_temp.isnull().value_counts(), min_temp.isna().value_counts()

In [None]:
count = 0
for mx, mn in zip(np.where(max_temp.isnull())[0], np.where(min_temp.isnull())[0]):
    if mx != mn:
        count += 1

print("Number of Misaligned Values: ", count)

In [None]:
temp["Date"] = pd.to_datetime(temp["Date"])

In [None]:
max_temp = temp[["Date", "Maximum Temperature"]].copy()
min_temp = temp[["Date", "Minimum Temperature"]].copy()
max_temp.set_index("Date", inplace=True)
min_temp.set_index("Date", inplace=True)
max_temp.rename(columns={"Maximum Temperature": "Tmax"}, inplace=True)
min_temp.rename(columns={"Minimum Temperature": "Tmin"}, inplace=True)

In [None]:
temps = max_temp.merge(min_temp, how="inner", left_on=["Date"], right_on=["Date"])


def avg_temp(row):

    return (row.Tmax + row.Tmin) / 2


temps["T"] = temps.apply(avg_temp, axis=1)

temps

In [None]:
temps.describe()

1. Dry ~ Nov(11)-Mar(3)
2. Wet ~ Apr(4)-Oct(10)

In [None]:
temp_season = temps.copy(deep=True)
temp_season["month"] = temp_season.index.month
temp_season["season"] = np.where(temp_season["month"].between(4, 10), "Wet", "Dry")
temp_season

In [None]:
temps[:].plot(figsize=(8, 7))
plt.show()

In [None]:
temps[-5000:].plot(figsize=(8, 7))
plt.show()

In [None]:
plt.figure(figsize=(8, 6))
temps.Tmax.hist(bins=60, alpha=0.6, label="Tmax")
temps.Tmin.hist(bins=60, alpha=0.6, label="Tmin")
temps["T"].hist(bins=60, alpha=0.6, label="T")
plt.legend()
plt.show()

In [None]:
plt.figure(figsize=(8, 6))
temp_season[temp_season["season"] == "Dry"]["T"].hist(bins=60, alpha=0.8, label="Dry")
temp_season[temp_season["season"] == "Wet"]["T"].hist(bins=60, alpha=0.8, label="Wet")
plt.legend()
plt.show()

Temperature Records

In [None]:
date_list = temps.index.tolist()
mth_temps = (
    pd.DataFrame(data=date_list, index=date_list).resample("MS")[0].agg(["min", "max"])
)
mth_temps["month"] = mth_temps.index.month


def min_max_temps(row):
    stats = temps[(temps.index >= row["min"]) & (temps.index <= row["max"])].agg(
        ["min", "max"]
    )
    row["Tmax_max"] = stats.loc["max", "Tmax"]
    row["Tmax_min"] = stats.loc["min", "Tmax"]
    row["Tmin_max"] = stats.loc["max", "Tmin"]
    row["Tmin_min"] = stats.loc["min", "Tmin"]
    row["T_max"] = stats.loc["max", "T"]
    row["T_min"] = stats.loc["min", "T"]
    return row


mth_temps = mth_temps.apply(min_max_temps, axis=1)
mth_temps

Temperature Extremes

In [None]:
grouped_mths = mth_temps.groupby(mth_temps.month)[
    ["Tmax_max", "Tmax_min", "Tmin_max", "Tmin_min", "T_max", "T_min"]
].agg(["min", "max"])

grouped_mths["months"] = [
    "Jan",
    "Feb",
    "Mar",
    "Apr",
    "May",
    "Jun",
    "Jul",
    "Aug",
    "Sep",
    "Oct",
    "Nov",
    "Dec",
]

grouped_mths = grouped_mths.set_index("months")
grouped_mths[
    [("Tmax_max", "max"), ("Tmin_min", "min"), ("Tmax_min", "min"), ("Tmin_max", "max")]
]

Decompose Time-Series Components

In [None]:
from statsmodels.graphics.api import qqplot
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.ar_model import AutoReg, ar_select_order, AutoRegResults

In [None]:
temps.sort_index(inplace=True)
temps

In [None]:
temps["T"].rolling(window=365 * 10).mean().plot(
    figsize=(8, 6), color="tab:red", title="Rolling mean over annual periods"
)