# Assignment One: Temperature Stats from CSV

---
### Importing main API and Data Source

In [1]:
import numpy as np
import pandas as pd
import calendar as cal
from datetime import datetime
import matplotlib.pyplot as plt
# import math
from scipy import stats

### Task (1)

In [2]:
# TODO : Calculate the overall average temperature.
DATA = "wetter.csv"
# Reading CSV file and transforming it to full data array.
arr_total = np.genfromtxt(DATA, delimiter=',', skip_header=1)   # dtype=float by default

# Putting Data into a 1D Array (data = [1, 2, 3, 4, 5, 6, 9.87654321])
data = arr_total[:,2]                 # (:) all rows, (2) 3rd column from the left (count starts at 0)
mean_temp_all = np.mean(data)            # averaging dataset with the 'np.mean()' function

In [3]:
# Printing results of calcs
print("\n" + "-"*55)
print(" TEMPERATURE STATISTICS")
print("-"*55 + "\n")
print(f"# Task(1):   Average temp (all year):         {mean_temp_all:.2f}°C")


-------------------------------------------------------
 TEMPERATURE STATISTICS
-------------------------------------------------------

# Task(1):   Average temp (all year):         12.10°C


### Task (2) & (3)

In [None]:
# TODO : Calculate the average temperature for the month of July.

# Reading CSV file and transforming it to full data array. Keeping Date column as string -> Tuple.
arr = np.genfromtxt(DATA, delimiter=',', skip_header=1, dtype=None, encoding='utf-8')

# Creating function to easily access monthly averages by a given integer (function attribute).
def mean_temp_func(ls, month):
    mm = {f"{int(m):02d}" for m in month}
    vals = [float(r[2]) for r in ls if isinstance(r[0], str) and r[0][5:7] in mm]
    return float(np.nanmean(vals)) if vals else np.nan

In [5]:
# TODO : Compare whether the months of July and May differ significantly in their average temperature.

def avrg_temp(M=None,raw=False):
    # Prefer existing mean_temp_all if you defined it in the notebook
    try:
        m = {int(M)}
    except (TypeError, ValueError):
        m = None
    if m == None:
        val = mean_temp_all # assumes arr and mean_temp_func are in scope
        return val if raw else f"{mean_temp_all:.2f}°C"
    else:
        val = float(mean_temp_func(arr, m)) # assumes arr and mean_temp_func are in scope
        return val if raw else f"{val:.2f}°C"
    
def dlt_temp(M1,M2,raw=0):
    val = float(np.abs(avrg_temp(M1,True)-avrg_temp(M2,True)))
    return val if raw else f"{val:.2f}°C"

In [6]:
def avrg_temp(M=None, raw=False):
    #   Return the average temperature for the selected month(s).
    #   
    #   Accepts:
    #       - None / "" / "all" / "year" → overall mean (mean_temp_all)
    #       - single int / float-like / numeric string → that month
    #       - list / tuple / set / other iterable → keeps any month numbers inside

    # Formats the return value to either raw numbers or a string for text outputs.
    def fmt(value):
        return value if raw else f"{value:.2f}°C"

    # Evaluates if working with strings, numbers and arrays.
    def normalize(obj):
        
        # Single scalar (int-like) case
        if isinstance(obj, (int, float)) or (isinstance(obj, str) and obj.strip().isdigit()):
            try:
                value = int(obj)
                if 1 <= value <= 12:
                    yield value
            except ValueError:
                return
            return

        # Strings that mean "overall"
        if isinstance(obj, str) and obj.strip().lower() in {"", "all", "year"}:
            return

        # Generic iterables (list, tuple, set, etc.)
        if isinstance(obj, (list, tuple, set)):
            for item in obj:
                yield from normalize(item)

    # Decides on months
    months = set(normalize(M))
    if not months:
        overall = float(mean_temp_all)  # assumes mean_temp_all, arr, mean_temp_func exist
        return fmt(overall)
    value = float(mean_temp_func(arr, months))
    return fmt(value)


In [7]:

# Computing & printing results of task (1)(2)(3).
print("\n" + "-"*55)
print(" TEMPERATURE STATISTICS")
print("-"*55 + "\n")
print(f"# Task(1):   Average temp (all year):         {avrg_temp()}")
print(f"# Task(2):   Average temp (jul):              {avrg_temp(7)}")
print(f"# Task(2):   Average temp (may):              {avrg_temp(5)}")
print(f"# Task(3):   Difference temp (ΔT[may,jul]):   {dlt_temp(7,5)}")
print("-"*55 + "\n")
print(f"# ###        Average temp (ΔT[may,jun,jul]):  {avrg_temp([5,6,7])}")


-------------------------------------------------------
 TEMPERATURE STATISTICS
-------------------------------------------------------

# Task(1):   Average temp (all year):         12.10°C
# Task(2):   Average temp (jul):              20.75°C
# Task(2):   Average temp (may):              15.24°C
# Task(3):   Difference temp (ΔT[may,jul]):   5.51°C
-------------------------------------------------------

# ###        Average temp (ΔT[may,jun,jul]):  18.15°C


In [8]:
# month number to string abbrv
def month_abbrv(*month): 
    abbrv = ('jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec')   
    
    if len(month) == 1 and isinstance(month[0], (list, tuple)):
        month = month[0]
    
    lsmstr = []

    for i in month:    
        lsmstr.append(abbrv[i-1])
    
    return lsmstr   # return of list-month-string (lsmstr)

# range = (1,2,3,4,5)
# ms = month_abbrv(range)
# month_abbrv(1)
# print(ms)

In [9]:
def print_out(*month, type='avrg_temp', raw=False):
        mlist = (",".join(month_abbrv(month))).title()

        if type == "avrg_temp":
                print(f"# ###         Average temp [{mlist}]:                    {avrg_temp(month,raw)}")
        if type == "dlt_temp":
                print(f"# ###         Difference temp (ΔT[{mlist}]):             {dlt_temp(*month,raw)}")

print_out(7,12,9)

# ###         Average temp [Jul,Dec,Sep]:                    14.74°C


In [None]:
"""
"t-statistics"
"p-value"
alpha=0.05
"""
# TODO : Compare whether the months of July and May differ significantly in their average temperature.

def monthly_samples(month):
    months = {f"{int(month):02d}"}
    return [
        float(r[2])
        for r in arr
        if isinstance(r[0], str) and r[0][5:7] in months and r[2] not in (None, "")
    ]


def welch_t_test(month_a, month_b, alpha=0.05):
    """Compare two months' temperatures using Welch's t-test."""
    samples_a = monthly_samples(month_a)
    samples_b = monthly_samples(month_b)

    t_stat, p_value = stats.ttest_ind(samples_a, samples_b, equal_var=False)

    result = "'YES' difference IS statistically significant!" if p_value < alpha else "'NO' difference is NOT statistically significant!"
    return {
        "month_a": f"month_a: {month_a}",
        "month_b": f"month_b: {month_b}",
        "mean_a": f"mean_a: {np.mean(samples_a)}",
        "mean_b": f"mean_b: {np.mean(samples_b)}",
        "t_stat": f"t_stat: {t_stat}",
        "p_value": f"p_value: {p_value}",
        "alpha": f"alpha: {alpha}",
        "decision": f"decision: {result}",
    }

# Applying function for target values and printitng all result values
result = welch_t_test(5, 7)
for r in result:
    print(result[r])

month_a: 5
month_b: 7
mean_a: 15.244362036290322
mean_b: 20.75114227125506
t_stat: -15.939995529946795
p_value: 2.2331067074039338e-46
alpha: 0.05
decision: 'YES' difference IS statistically significant!
5.10°C
3.88°C


In [11]:
# Printing difference temp 2-months steps
print("\n" + "-"*55)
print(" TEMPERATURE COMPARISON")
print("-"*55 + "\n")
# print_out(1,3,type="dlt_temp")
# print_out(2,4,type="dlt_temp")
# print_out(3,5,type="dlt_temp")
# print_out(4,6,type="dlt_temp")
# print_out(5,7,type="dlt_temp")
# print_out(5,8,type="dlt_temp")
# print_out(7,9,type="dlt_temp")
# print_out(8,10,type="dlt_temp")
# print_out(9,11,type="dlt_temp")
# print_out(10,12,type="dlt_temp")
# print_out(11,1,type="dlt_temp")
# print_out(12,2,type="dlt_temp")

# print(f"# Task(3.0):  Difference temp (ΔT[may,jul]):  {np.abs(mean_temp_func(arr,{5})-mean_temp_func(arr,{7})):.2f}°C\n")
# print(f"# Task(3.1):  Difference temp (ΔT[apr,jun]):  {np.abs(mean_temp_func(arr,{4})-mean_temp_func(arr,{6})):.2f}°C")
# print(f"# Task(3.2):  Difference temp (ΔT[mar,may]):  {np.abs(mean_temp_func(arr,{3})-mean_temp_func(arr,{5})):.2f}°C")
# print(f"# Task(3.3):  Difference temp (ΔT[feb,apr]):  {np.abs(mean_temp_func(arr,{2})-mean_temp_func(arr,{4})):.2f}°C")
# print(f"# Task(3.4):  Difference temp (ΔT[jan,mar]):  {np.abs(mean_temp_func(arr,{1})-mean_temp_func(arr,{3})):.2f}°C")
# print(f"# Task(3.5):  Difference temp (ΔT[dec,feb]):  {np.abs(mean_temp_func(arr,{12})-mean_temp_func(arr,{2})):.2f}°C")
# print(f"# Task(3.6):  Difference temp (ΔT[nov,jan]):  {np.abs(mean_temp_func(arr,{11})-mean_temp_func(arr,{1})):.2f}°C")
# print(f"# Task(3.7):  Difference temp (ΔT[okt,dec]):  {np.abs(mean_temp_func(arr,{10})-mean_temp_func(arr,{12})):.2f}°C")
# print(f"# Task(3.8):  Difference temp (ΔT[sep,nov]):  {np.abs(mean_temp_func(arr,{9})-mean_temp_func(arr,{11})):.2f}°C")
# print(f"# Task(3.9):  Difference temp (ΔT[aug,okt]):  {np.abs(mean_temp_func(arr,{8})-mean_temp_func(arr,{10})):.2f}°C")
# print(f"# Task(3.10): Difference temp (ΔT[jul,sep]):  {np.abs(mean_temp_func(arr,{7})-mean_temp_func(arr,{9})):.2f}°C")
# print(f"# Task(3.11): Difference temp (ΔT[jun,aug]):  {np.abs(mean_temp_func(arr,{6})-mean_temp_func(arr,{8})):.2f}°C\n")

numbers = list(range(12))
for i in numbers:
#    numbers[i] += 1
    # print_out(i+1)
    m_a = i+1
    m_b = i+3
    if m_b > 12:
        m_b -= 12
    print_out(m_a,m_b,type="dlt_temp")

print("-"*55 + "\n")

for i in numbers:
    print_out(i+1)


-------------------------------------------------------
 TEMPERATURE COMPARISON
-------------------------------------------------------

# ###         Difference temp (ΔT[Jan,Mar]):             4.50°C
# ###         Difference temp (ΔT[Feb,Apr]):             6.80°C
# ###         Difference temp (ΔT[Mar,May]):             7.88°C
# ###         Difference temp (ΔT[Apr,Jun]):             7.80°C
# ###         Difference temp (ΔT[May,Jul]):             5.51°C
# ###         Difference temp (ΔT[Jun,Aug]):             1.80°C
# ###         Difference temp (ΔT[Jul,Sep]):             3.81°C
# ###         Difference temp (ΔT[Aug,Oct]):             7.67°C


# ###         Difference temp (ΔT[Sep,Nov]):             9.55°C
# ###         Difference temp (ΔT[Oct,Dec]):             7.52°C
# ###         Difference temp (ΔT[Nov,Jan]):             4.53°C
# ###         Difference temp (ΔT[Dec,Feb]):             1.22°C
-------------------------------------------------------

# ###         Average temp [Jan]:                    2.86°C
# ###         Average temp [Feb]:                    3.88°C
# ###         Average temp [Mar]:                    7.36°C
# ###         Average temp [Apr]:                    10.68°C
# ###         Average temp [May]:                    15.24°C
# ###         Average temp [Jun]:                    18.48°C
# ###         Average temp [Jul]:                    20.75°C
# ###         Average temp [Aug]:                    20.28°C
# ###         Average temp [Sep]:                    16.94°C
# ###         Average temp [Oct]:                    12.61°C
# ###         Average temp [Nov]:                    7.40°C
# ###         Averag