In [1]:
import pandas as pd
import numpy as np

In [9]:
file_path = "C:\\Users\\orion\\OneDrive\\Downloads\\Mumbai_1990_2022_Santacruz.csv"
weather_data = pd.read_csv(file_path)
weather_data

Unnamed: 0,time,tavg,tmin,tmax,prcp
0,01-01-1990,23.2,17.0,,0.0
1,02-01-1990,22.2,16.5,29.9,0.0
2,03-01-1990,21.8,16.3,30.7,0.0
3,04-01-1990,25.4,17.9,31.8,0.0
4,05-01-1990,26.5,19.3,33.7,0.0
...,...,...,...,...,...
11889,21-07-2022,27.6,25.6,30.5,10.9
11890,22-07-2022,28.3,26.0,30.5,3.0
11891,23-07-2022,28.2,25.8,31.3,5.1
11892,24-07-2022,28.1,25.6,30.4,7.1


#### Mapping Function

In [10]:
def mapper():
    df = pd.read_csv(file_path, parse_dates=["time"], dayfirst=True)  # Parse date
    df["year"] = df["time"].dt.year  # Extract year
    df = df[["year", "tavg"]].dropna()  # Keep only year and tavg, remove NaNs
    return df

df = mapper()
df

Unnamed: 0,year,tavg
0,1990,23.2
1,1990,22.2
2,1990,21.8
3,1990,25.4
4,1990,26.5
...,...,...
11889,2022,27.6
11890,2022,28.3
11891,2022,28.2
11892,2022,28.1


#### Reducing Function

In [11]:
def reducer(mapped_data):
    grouped = mapped_data.groupby("year")["tavg"].apply(list)  # Group by year, keep all tavg values
    avg_temps = mapped_data.groupby("year")["tavg"].mean()  # Compute mean tavg per year
    result_df = pd.DataFrame({"tavg_values": grouped, "avg_tavg": avg_temps})
    return result_df

result_df = reducer(df)
result_df

Unnamed: 0_level_0,tavg_values,avg_tavg
year,Unnamed: 1_level_1,Unnamed: 2_level_1
1990,"[23.2, 22.2, 21.8, 25.4, 26.5, 25.1, 26.0, 26....",27.076944
1991,"[18.4, 17.9, 18.8, 20.5, 22.2, 22.3, 22.0, 20....",26.933791
1992,"[22.8, 24.1, 23.6, 22.2, 23.7, 22.3, 20.3, 21....",27.109836
1993,"[24.9, 24.7, 24.1, 24.8, 25.7, 26.3, 23.9, 23....",27.175549
1994,"[25.6, 24.7, 23.9, 26.1, 25.1, 25.6, 26.3, 25....",26.939118
1995,"[22.1, 22.2, 21.3, 20.9, 21.8, 23.3, 22.7, 21....",27.254396
1996,"[23.9, 23.1, 23.6, 24.3, 24.2, 22.9, 22.3, 23....",27.636612
1997,"[25.1, 24.3, 24.1, 22.7, 23.1, 25.1, 24.6, 24....",27.657808
1998,"[27.6, 27.4, 28.7, 29.1, 26.3, 25.7, 25.7, 24....",27.746575
1999,"[25.6, 25.1, 24.4, 25.1, 23.5, 22.6, 21.3, 22....",27.651374


In [14]:
df = weather_data
print("Weather Data loaded")

mapped_data = mapper()
print("Mapped Data loaded")

reduced_result = reducer(mapped_data)
print("Reduced Data loaded")
print(reduced_result.head())

hottest_year = reduced_result["avg_tavg"].idxmax()
coldest_year = reduced_result["avg_tavg"].idxmin()

print(f"Hottest Year: {hottest_year} with avg temp {reduced_result.loc[hottest_year, 'avg_tavg']:.2f}°C")
print(f"Coldest Year: {coldest_year} with avg temp {reduced_result.loc[coldest_year, 'avg_tavg']:.2f}°C")

Weather Data loaded
Mapped Data loaded
Reduced Data loaded
                                            tavg_values   avg_tavg
year                                                              
1990  [23.2, 22.2, 21.8, 25.4, 26.5, 25.1, 26.0, 26....  27.076944
1991  [18.4, 17.9, 18.8, 20.5, 22.2, 22.3, 22.0, 20....  26.933791
1992  [22.8, 24.1, 23.6, 22.2, 23.7, 22.3, 20.3, 21....  27.109836
1993  [24.9, 24.7, 24.1, 24.8, 25.7, 26.3, 23.9, 23....  27.175549
1994  [25.6, 24.7, 23.9, 26.1, 25.1, 25.6, 26.3, 25....  26.939118
Hottest Year: 2018 with avg temp 28.76°C
Coldest Year: 1991 with avg temp 26.93°C
