Assignment 1
The file ‘city_temperature.csv’ provides sample data for daily average temperature for 5 cities across 3 months. Make python program to read and process this data into numpy arrays for the following requirements:

    1. Function to identify the day when max temperature recorded for each city. Return as a dict (city - temp pair)
    2. Function to identify the monthly average temperature for each of the city. Return dict.
    3. Consider the timeline into 5 days of sliding window. Identify for each city : the number of 5 day stretch(s) where temperature stayed more than that of monthly average. 

In [96]:
import numpy as np
from collections import defaultdict
from datetime import datetime
import os
from dateutil.parser import parse
import calendar
from datetime import datetime, timedelta

In [87]:
def read_csv_city_temp(filename):
    data=np.genfromtxt(filename,delimiter=',',skip_header=1,dtype=str)
    header = np.genfromtxt(filename, delimiter=',', max_rows=1, dtype=str)
    dates=np.array([datetime.strptime(row[0], "%d-%m-%Y") for row in data])
    temp=data[:,1:].astype(float)
    cities=header[1:]
    #print(dates)
    return dates,temp,cities

In [88]:
def get_max_temp_per_city(dates,temperatures,cities):
    """
    Returns a dict mapping each city to its highest recorded temperature.
    
    Parameters
    ----------
    dates : ndarray of datetime
        (not used here, but kept for signature consistency)
    temps : ndarray, shape (n_days, n_cities)
        Daily temperatures for each city in its own column.
    cities : list of str
        City names in the same order as the columns of `temps`.
    
    Returns
    -------
    dict
        { city_name: max_temperature }
    """
    result={}
    for col_index,city in enumerate(cities):
        max_row_index=np.argmax(temperatures[:,col_index])
        max_temp=temperatures[max_row_index,col_index]
        max_temp_date=dates[max_row_index].strftime("%d-%m-%Y")
        result[city]=(max_temp_date,max_temp)
    return result

In [97]:
def get_monthly_avg_temp_per_city(dates,temperatures,cities):
    result={}
    #for date in dates:
        #print(date)
    months=np.array([date.month for date in dates])
    #print(f"months: {months}")
    uniq_months=np.unique(months)
    #print(f"uniq_months: {uniq_months}")
    for city_index, city in enumerate(cities):
        city_avg_temp={}
        for month in uniq_months:
            mask=(months == month)
            #print(month)
            #city_avg_temp[calendar.month_name[int(month)]] = float(np.round(np.mean(temperatures[mask,city_index]),2))
            city_avg_temp[int(month)] = float(np.round(np.mean(temperatures[mask,city_index]),2))
        result[city] = city_avg_temp
    return result



In [98]:
def get_count_above_avg_five_days(dates, temps, cities):
    """
    For each city, count how many 5‑day contiguous stretches
    where every day’s temperature exceeds that city’s monthly average.

    Parameters
    ----------
    dates : ndarray of datetime.datetime, shape (n_days,)
    temps : ndarray of float, shape (n_days, n_cities)
    cities : sequence of str, length n_cities

    Returns
    -------
    dict
        { city_name: count_of_5-day_stretches }
    """
    # 1. Extract month numbers for each date
    months = np.array([date.month for date in dates])

    # 2. Compute monthly averages (dict: city -> { month: avg_temp })
    #    You can reuse your monthly_avg_temp function here:
    monthly_avgs = get_monthly_avg_temp_per_city(dates, temps, cities)
    print(monthly_avgs)

    result = {}
    n_days, n_cities = temps.shape
    print(f"n_days: {n_days}")
    print(f"n_cities: {n_cities}")

    # 3. For each city (column):
    for city_index, city in enumerate(cities):
        count = 0
        city_temps = temps[:, city_index]  # the full time series for this city
        print(f"city_temps: {city_temps}")

        # Slide a window of length 5 from day 0 to day n_days-5
        for start in range(n_days - 5 + 1):
            window_temps  = city_temps[start:start+5]
            window_months = months[start:start+5]

            # Check if _all_ 5 days exceed their corresponding monthly avg
            ok = True
            for k in range(5):
                m   = window_months[k]
                avg = monthly_avgs[city][m]
                if window_temps[k] <= avg:
                    ok = False
                    break

            if ok:
                count += 1

        result[city] = count

    return result


In [107]:
#Identify every calendar‑consecutive 5‑day period for each city during which every single day’s temperature exceeds that day’s monthly average.
def get_count_above_avg_five_days(dates, temps, cities, monthly_avgs=None):
    """
    Counts calendar‑consecutive 5‑day windows where
    every day's temp > that day's monthly average.
    -- Few Points i have consider here --
    Calendar‑consecutive: ensures no gaps or missing days.
    Per‑day monthly check: each day is judged against its own month’s average.
    Comprehensive: finds every possible 5‑day block, across month boundaries if data are complete.
    """
    #check whethere consumer has passed monthly_avgs parameter or not
    if not monthly_avgs:
        monthly_avgs= get_monthly_avg_temp_per_city(dates, temps, cities)
    # Map each date → its row index
    date_to_idx = {d: idx for idx, d in enumerate(dates)}
    #streaks = {city: [] for city in cities}
    result = {}

    for ci, city in enumerate(cities):
        count = 0

        for start_date in dates:
            # Build the 5 calendar‑day block
            window_dates = [start_date + timedelta(days=off) for off in range(5)]
            # Skip if any calendar day is missing
            try:
                idxs = [date_to_idx[d] for d in window_dates]
            except KeyError:
                continue

            # Grab the temperatures for these five days
            window_temps = temps[idxs, ci]

            # Check all five against their own month’s avg
            if all(
                window_temps[k] > monthly_avgs[city][window_dates[k].month]
                for k in range(5)
            ):
              count += 1
              
        result[city] = count

    return result

In [109]:
csv_file_name="city_temperature.csv"
#check for exsitance of 
isFileExist=os.path.exists(csv_file_name)
if(isFileExist):
    print(f"provided csv file path {csv_file_name} exist!")
    dates,temperatures,cities= read_csv_city_temp(csv_file_name)
    print(f"citites: {cities} ")
    print("=====" * 30)
    #print(f"Dates: {dates} ")
    print("(Q-1.1)-  Identify the day when max temperature recorded for each city\n")
    result_max_temp_per_city=get_max_temp_per_city(dates,temperatures,cities)
    print(result_max_temp_per_city)
    print("=====" * 30)

    print("(Q-1.2)-  identify the monthly average temperature for each of the city. Return dict\n")
    result_monthly_avgs = get_monthly_avg_temp_per_city(dates,temperatures,cities)
    print(result_monthly_avgs)
    print("=====" * 30)

    print("(Q-1.3)-  Consider the timeline into 5 days of sliding window. Identify for each city : the number of 5 day stretch(s) where temperature stayed more than that of monthly average. \n")
    result_count_above_avg_five_days = get_count_above_avg_five_days(dates,temperatures,cities,result_monthly_avgs)
    print(result_count_above_avg_five_days)
    print("=====" * 30)
else:
    print(f"Provided File Path does not exist at given location: {csv_file_name}")

provided csv file path city_temperature.csv exist!
citites: ['London' 'Tokyo' 'Sydney' 'Cairo' 'Rio'] 
(Q-1.1)-  Identify the day when max temperature recorded for each city

{'London': ('30-08-2024', 21.7), 'Tokyo': ('30-08-2024', 31.9), 'Sydney': ('28-08-2024', 16.0), 'Cairo': ('30-08-2024', 36.8), 'Rio': ('30-08-2024', 27.4)}
(Q-1.2)-  identify the monthly average temperature for each of the city. Return dict

{'London': {6: 17.44, 7: 19.61, 8: 20.59}, 'Tokyo': {6: 25.64, 7: 28.54, 8: 30.53}, 'Sydney': {6: 13.93, 7: 14.04, 8: 14.95}, 'Cairo': {6: 31.02, 7: 33.54, 8: 35.48}, 'Rio': {6: 22.52, 7: 24.27, 8: 26.08}}
(Q-1.3)-  Consider the timeline into 5 days of sliding window. Identify for each city : the number of 5 day stretch(s) where temperature stayed more than that of monthly average. 

{'London': 8, 'Tokyo': 29, 'Sydney': 5, 'Cairo': 25, 'Rio': 19}
