# 1. Libraries Import

In [None]:
# ========================================================
# = Libraries import
# ========================================================

from collections import defaultdict
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import boto3
import warnings
warnings.filterwarnings('ignore')
import seaborn as sns
import plotly.express as px

import geoplot
import geopandas

# 2. AWS credentials

In [None]:
# ========================================================
# = AWS Credentials
# ========================================================

PROD_AWS_PROFILE = "gsesami-prod"
AWS_REGION = "us-west-2"

prod_session = boto3.session.Session(profile_name=PROD_AWS_PROFILE)

prod_client = prod_session.client(
    "timestream-query", region_name=AWS_REGION)

# 3. Getting Monitors from Sites

## 3.1. Reading all monitors from CSV

In [None]:
# Reading all monitors in the database:
df_monitors = pd.read_csv('input_data/monitors/Monitors_List.csv')

In [None]:
df_monitors.info()

## 3.2. Getting faulty sites

In [None]:
# Getting a list of 
faulty_sites = pd.read_csv('./faulty_sites/window_size_7days/SiteIDs_faults.csv')
# Getting only the SiteIDs
faulty_sites_list_of_IDs = faulty_sites['site_id']

In [None]:
# DOuble check this

####


####
sites = df_monitors['siteId'].unique()

In [None]:

len(faulty_sites_list_of_IDs.unique())

# 4. Initial Analysis on Monitors

## 4.1. Plotting monitors per Site (all sites and monitors)

In [None]:
# Getting how many monitors per site, in general.
# Considering ALL monitors, 
df_monitorsPerSite_plot = df_monitors[['siteId','source']]
df_monitorsPerSite_plot['source'] = 1
df_monitorsPerSite_plot = df_monitorsPerSite_plot.groupby('siteId').sum()

In [None]:
sns.set_theme()

# Set plot dimensions
plt.figure(figsize=(10,10))

ax = sns.countplot(x=df_monitorsPerSite_plot['source'],palette='flare')

#for i in ax.containers:
#    ax.bar_label(i,)

ax.set(xlabel='Monitors per Site',ylabel='Count of Sites',title='Count of monitors per site (all monitors)')

plt.show()

## 4.2. Filtering by monitors included in SiteID's that are faulty

In [None]:
# Creating a dataframe of monitors with SideIDs that are faulty
monitors_faulty_sites = df_monitors[df_monitors['siteId'].isin(faulty_sites_list_of_IDs)]

In [None]:
# Saving them as a CSV
monitors_faulty_sites.to_csv('./faulty_sites/window_size_7days/monitors_faulty_sites.csv')

In [None]:
monitors_faulty_sites['manufacturerApi'].value_counts()

## 4.3. Filtering by monitors with API integration

In [None]:
# Currenttly only FIMER and SMA have API integrations
api_integrated = ['FIMER','SMA']

In [None]:
# Checking 
fimer_monitors = df_monitors[df_monitors['manufacturerApi'] == "FIMER"]
sma_monitors = df_monitors[df_monitors['manufacturerApi'] == "SMA"]
print("The amount of FIMER monitors is: " + str(len(fimer_monitors)))
print("The amount of SMA monitors is: " + str(len(sma_monitors)))

In [None]:
monitors_integrated = df_monitors[df_monitors['manufacturerApi'].isin(api_integrated)]
print("The amount of total monitors with API integration is: " + str(len(monitors_integrated)))
print("This is out of a total of: "  + str(len(df_monitors)))

## 4.5. Counting monitors in each site

In [None]:
monitors_integrated_plot = monitors_integrated[['siteId','source']]

In [None]:
monitors_integrated_plot['source'] = 1

In [None]:
monitors_integrated_plot = monitors_integrated_plot.groupby('siteId').sum()

In [None]:
sns.set_theme()

# Set plot dimensions
plt.figure(figsize=(10,10))

sns.violinplot(monitors_integrated_plot)

In [None]:
sns.set_theme()

# Set plot dimensions
plt.figure(figsize=(10,10))

ax = sns.countplot(x=monitors_integrated_plot['source'],palette='flare')
for i in ax.containers:
    ax.bar_label(i,)

ax.set(xlabel='Monitors per Site',title='Count of monitors per site with API Integration')

plt.show()

## 4.6. Checking monitors in faulty sites

In [None]:
faulty_sites_monitors_with_integration = monitors_integrated[monitors_integrated['siteId'].isin(faulty_sites_list_of_IDs)]

In [None]:
len(faulty_sites_monitors_with_integration)

In [None]:
# Getting SMA
faulty_SMA = faulty_sites_monitors_with_integration[faulty_sites_monitors_with_integration['manufacturerApi'] == 'SMA']
# Getting SMA
faulty_FIMER = faulty_sites_monitors_with_integration[faulty_sites_monitors_with_integration['manufacturerApi'] == 'FIMER']

In [None]:
faulty_sites_monitors_to_plot = faulty_sites_monitors_with_integration[['siteId','source']]

In [None]:
faulty_sites_monitors_to_plot['source'] = 1

In [None]:
faulty_sites_monitors_to_plot = faulty_sites_monitors_to_plot.groupby('siteId').sum()

In [None]:
faulty_sites_monitors_to_plot

In [None]:
sns.set_theme()

# Set plot dimensions
plt.figure(figsize=(10,10))

ax = sns.countplot(x=faulty_sites_monitors_to_plot['source'],palette='flare')
for i in ax.containers:
    ax.bar_label(i,)

ax.set(xlabel='Monitors per Faulty Site',title='Count of monitors per faulty site with API Integration')

plt.show()

# 5. Geo Locating

In [None]:
import geoplot as gplt
import geoplot.crs as gcrs
import pandas as pd
import matplotlib.pyplot as plt
import mapclassify as mc
import numpy as np

from shapely.geometry import Point
import geopandas as gpd
from geopandas import GeoDataFrame


%matplotlib inline

In [None]:
df = pd.read_csv("./faulty_sites/window_size_7days/monitors_faulty_sites.csv", delimiter=',', skiprows=0, low_memory=False)

In [None]:
df = df[df['manufacturerApi'] == 'SMA']

In [None]:
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
australia = world.loc[world['name'] == 'Australia'] # get Australia row
boundaries = australia['geometry'] # get Singapore geometry

In [None]:
geometry = [Point(xy) for xy in zip(df['longitude'], df['latitude'])]
gdf = GeoDataFrame(df, geometry=geometry)   
gdf.plot(ax=australia.plot(figsize=(10, 6)), marker='o', color='red', markersize=15)

In [None]:
df

In [None]:
df.plot(kind="scatter", x="longitude", y="latitude")

In [None]:
df.plot(kind="scatter", x="longitude", y="latitude", alpha=0.1)

In [None]:
df.plot(kind="scatter", x="longitude", y="latitude",
    #s=df['population']/100, label="population",
    #c="median_house_value", cmap=plt.get_cmap("jet"),
    colorbar=True, alpha=0.4, figsize=(10,7),
)
plt.legend()
#save_fig("housing_prices_scatterplot")
plt.show()