In [97]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

#Read file
df = pd.read_csv("venv/weatherbsas.csv")
df.head()

Unnamed: 0,STATION,NAME,DATE,TAVG,TMAX,TMIN
0,ARM00087582,"AEROPARQUE JORGE NEWBERY, AR",2002-01-01,25.3,,19.5
1,ARM00087582,"AEROPARQUE JORGE NEWBERY, AR",2002-01-02,25.0,30.5,22.0
2,ARM00087582,"AEROPARQUE JORGE NEWBERY, AR",2002-01-03,24.2,27.8,21.5
3,ARM00087582,"AEROPARQUE JORGE NEWBERY, AR",2002-01-04,22.9,27.0,
4,ARM00087582,"AEROPARQUE JORGE NEWBERY, AR",2002-01-05,18.4,24.9,17.0


In [98]:
#Clean file of unncecesary data
df.drop(columns=["STATION","TAVG"], inplace=True)


# Convert to datetime format
df["DATE"] = pd.to_datetime(df["DATE"])

#sort by date
df = df.sort_values(by="DATE")

#Drop leap days
df = df[~((df["DATE"].dt.month == 2) & (df["DATE"].dt.day == 29))]

#Index as date
df.set_index("DATE",inplace=True)
df.head(6)

Unnamed: 0_level_0,NAME,TMAX,TMIN
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2002-01-01,"AEROPARQUE JORGE NEWBERY, AR",,19.5
2002-01-01,"MINISTRO PISTARINI, AR",,
2002-01-01,"BUENOS AIRES OBSERV, AR",30.1,19.4
2002-01-02,"AEROPARQUE JORGE NEWBERY, AR",30.5,22.0
2002-01-02,"BUENOS AIRES OBSERV, AR",30.9,20.6
2002-01-02,"MINISTRO PISTARINI, AR",,18.5


In [99]:
#split tmin and max into series
df_max = df["TMAX"]
df_min = df["TMIN"]

#Groupby date
df_min = df_min.groupby(df.index.to_series().dt.date).min()
df_max = df_max.groupby(df.index.to_series().dt.date).max()

#Convert index to datetime format
df_max.index= pd.to_datetime(df_max.index)
df_min.index= pd.to_datetime(df_max.index)

len(df_min)

7663

In [100]:
#Break last two years
max_2020 = df_max[df_max.index.year >= 2020]
min_2020 = df_min[df_min.index.year >= 2020]
df_max = df_max[df_max.index.year < 2020]
df_min = df_min[df_min.index.year < 2020]
df_min

DATE
2002-01-01    19.4
2002-01-02    18.5
2002-01-03    17.4
2002-01-04    18.4
2002-01-05    16.6
              ... 
2019-12-27    19.9
2019-12-28    20.8
2019-12-29    22.4
2019-12-30     NaN
2019-12-31    20.5
Name: TMIN, Length: 6570, dtype: float64

In [101]:
#Change datetime format to day of year
max_2020.index = max_2020.index.to_series().apply(lambda x: x.strftime('%m-%d'))
min_2020.index = min_2020.index.to_series().apply(lambda x: x.strftime('%m-%d'))
df_min.index = df_min.index.to_series().apply(lambda x: x.strftime('%m-%d'))
df_max.index = df_max.index.to_series().apply(lambda x: x.strftime('%m-%d'))

#Group by day of year
record_min = df_min.groupby(df_min.index).min()
record_max = df_max.groupby(df_max.index).max()
min_2020 = min_2020.groupby(min_2020.index).min()
max_2020 = max_2020.groupby(max_2020.index).max()
max_2020

DATE
01-01    34.6
01-02    34.8
01-03    35.0
01-04    37.6
01-05    33.5
         ... 
12-27    35.0
12-28    32.1
12-29    34.0
12-30    40.1
12-31    38.1
Name: TMAX, Length: 365, dtype: float64

In [102]:
#2015 temps that break records
max_2020 = max_2020[max_2020 > record_max]
min_2020 = min_2020[min_2020 < record_min]

In [103]:
#2015 temps that break records
max_2020 = max_2020[max_2020 > record_max]
min_2020 = min_2020[min_2020 < record_min]

ValueError: Can only compare identically-labeled Series objects

In [None]:
def plot():

    #Create figure
    plt.figure(figsize=(40,20))

    #Plot line charts
    plt.plot(record_max.index, record_max, record_min.index, record_min,c="black",zorder=2)

    #Scatter plots
    #plt.scatter(max_2015.index, max_2015, s=80, c="red", label="Temperatura extrema alta",zorder=0)
    #plt.scatter(min_2015.index, min_2015, s=80, c="blue", label="Temperatura exrema baja",zorder=0)

    #Fill between
    plt.gca().fill_between(record_max.index, record_max,record_min, facecolor="lightgrey", alpha=0.25)

    #Set x axis in monthly intervals
    months = ['Enero', 'Febrero', 'Marzo', 'Abril', 'Mayo', 'Junio', 'Julio', 'Agosto', 'Septiembre', 'Octubre', 'Noviembre', 'Diciembre', "Enero"]
    plt.xticks(np.linspace(0,365,13), months)
    temps = ["-10° c","O° c", "10° c","20° c","30° c","40° c"]

    #Change y axis format to add "° c"
    plt.yticks(np.linspace(-10,40,6),temps)

    #Remove box
    for spine in plt.gca().spines.values():
        spine.set_visible(False)

    #Axis style
    plt.tick_params(axis ='x',
                   labelsize = 40, colors ='black',width=5, length=10, labelrotation=45)
    plt.tick_params(axis ='y',
                   labelsize = 30, colors ='black',width=5, length= 10,)

    #Title and legend
    plt.title("Temperaturas extremas en Buenos Aires. Periodo 2002-2022.", fontsize=80);
    #plt.legend(loc=4, frameon=False, title='', fontsize=40, title_fontsize=40);


    return plt.savefig('my_plot.png',bbox_inches='tight')

In [None]:
plot()