# São Paulo Water Quality Charts by Beach / Collection Point

Charts of the measures of Enterococcus of every beach/collection point over time

In [None]:
import datetime
import math
import warnings

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

from matplotlib.lines import Line2D

from IPython.core.display import display, HTML

%matplotlib inline

warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv('/kaggle/input/sp-beaches-water-quality/sp_beaches.csv', parse_dates=['Date'])

In [None]:
def closest_sunday(date):
    weekday = date.weekday()

    if weekday < 3:
        return date - datetime.timedelta(days=weekday + 1)
    elif weekday < 6:
        return date + datetime.timedelta(days=6 - weekday)
    else: return date

In [None]:
df['date_sunday'] = df['Date'].apply(lambda x: closest_sunday(x))

In [None]:
# Create an array with all sundays in the period
first = min(df['date_sunday'])
last = max(df['date_sunday'])

sundays = pd.date_range(start=first, end=last, freq='w')

In [None]:
cities = np.unique(df['City'])

In [None]:
for city in cities:
    for beach in np.unique(df[df['City'] == city]['Beach']):

        missing_dates = list(set(sundays) - set(df[(df['City'] == city) & (df['Beach'] == beach)]['date_sunday']))

        df_missing = pd.DataFrame({'City': city, 'Beach': beach, 'Date': np.nan, 
                  'Enterococcus': None, 'date_sunday': missing_dates})
        
        df = pd.concat([df, df_missing], ignore_index=True)

In [None]:
text_color = 'dimgrey'
plot_color = 'slategrey'
axis_color = 'lightgrey'

for city in cities:
    
    display(HTML(f'<H3>{city}</H3>'))
    
    beaches = np.unique(df[df['City'] == city]['Beach'])
    
    rows, cols = (len(beaches) + 2) // 3, 3
    fig, ax = plt.subplots(rows, cols, figsize=(14, rows * 2 + (rows - 1) * 0.4), sharex=True, 
                           sharey=True, squeeze=False)

    fig.subplots_adjust(top=0.99, left=0.05, bottom=0.1, hspace=0.3, wspace=0.2)
    
    fig.text(0, 1, 'Enterococcus (CFU/100ml)', rotation=90, color=text_color, size=9, va='top')

    for r in range(rows):
        for c in range(cols):
        
            i = r * cols + c
            
            if i < len(beaches):

                beach = beaches[i]

                df_beach = df[(df['Beach'] == beach) & (df['City'] == city)].sort_values(['date_sunday'])
                
                ax[r][c].plot(df_beach['date_sunday'], df_beach['Enterococcus'],
                             color=plot_color)
                
                ax[r][c].scatter(df_beach['date_sunday'], df_beach['Enterococcus'],
                             color=plot_color, marker='.', s=2)
                                  
                ax[r][c].set_title(beach, fontsize=9, color=text_color)
                
                ax[r][c].spines['top'].set_visible(False)
                ax[r][c].spines['right'].set_visible(False)
                ax[r][c].spines['left'].set_visible(False)
                ax[r][c].spines['bottom'].set_color(axis_color)

                ax[r][c].set_xlim(datetime.date(2012, 1, 1), datetime.date(2021, 1, 1))
                ax[r][c].set_ylim(0, 1000)

                ax[r][c].tick_params(bottom=False, left=False, labelcolor=text_color)

                ax[r][c].set_xticks(pd.date_range(start='2012-01-01', end='2021-01-01', freq='2AS'))
                ax[r][c].set_xticklabels(np.arange(2012, 2022, 2), ha='left')

            else:
                for s in ax[r][c].spines.keys():
                    ax[r][c].spines[s].set_visible(False)

                ax[r][c].xaxis.set_visible(False)
                ax[r][c].yaxis.set_visible(False)

    plt.show()

    

*_scatter plot where there is missing or not continuous data._