# Loading Modules

In [13]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import warnings
from IPython.display import display


warnings.filterwarnings("ignore")

# Loading Data

In [16]:
data_path = 'data'

# Create the data directory if it doesn't exist
if not os.path.exists(data_path):
    os.makedirs(data_path)

# List of expected files in the data_path' directory
expected_files = [f'{data_path}/benin-malanville.csv', f'{data_path}/sierraleone-bumbuna.csv', f'{data_path}/togo-dapaong_qc.csv']

# Check if the data_path' directory is empty or if any of the expected files are missing
if not all([os.path.exists(f) for f in expected_files]):
    import gdown
    import zipfile

    # Correct file URL format (replace 'FILE_ID' with the actual ID from the link)
    file_url = 'https://drive.google.com/uc?id=1wRxR5CROC95Z9vPYlXwHrSGt79Of_zQU'
    destination = f'{data_path}/data.zip'
    
    # Download the file from Google Drive using gdown
    gdown.download(file_url, destination, quiet=True)

    # Extract the contents of the zip file
    with zipfile.ZipFile(destination, 'r') as zip_ref:
        zip_ref.extractall()
        
    # Remove the zip file after extracting
    os.remove(destination)


benin_malaniville_data = pd.read_csv(f'{data_path}/benin-malanville.csv', date_parser='Timestamp')
sierra_leone_bumbuna_data = pd.read_csv(f'{data_path}/sierraleone-bumbuna.csv', date_parser='Timestamp')
togo_dapaong_data = pd.read_csv(f'{data_path}/togo-dapaong_qc.csv', date_parser='Timestamp')

# Explanatory Data Analysis

## Exploring the data

In [17]:
display(
    "Benin Malaniville Data",
    benin_malaniville_data.head(),
    "Sierra Leone Bumbuna Data",
    sierra_leone_bumbuna_data.head(),
    "Togo Dapaong Data",
    togo_dapaong_data.head()
)

'Benin Malaniville Data'

Unnamed: 0,Timestamp,GHI,DNI,DHI,ModA,ModB,Tamb,RH,WS,WSgust,WSstdev,WD,WDstdev,BP,Cleaning,Precipitation,TModA,TModB,Comments
0,2021-08-09 00:01,-1.2,-0.2,-1.1,0.0,0.0,26.2,93.4,0.0,0.4,0.1,122.1,0.0,998,0,0.0,26.3,26.2,
1,2021-08-09 00:02,-1.1,-0.2,-1.1,0.0,0.0,26.2,93.6,0.0,0.0,0.0,0.0,0.0,998,0,0.0,26.3,26.2,
2,2021-08-09 00:03,-1.1,-0.2,-1.1,0.0,0.0,26.2,93.7,0.3,1.1,0.5,124.6,1.5,997,0,0.0,26.4,26.2,
3,2021-08-09 00:04,-1.1,-0.1,-1.0,0.0,0.0,26.2,93.3,0.2,0.7,0.4,120.3,1.3,997,0,0.0,26.4,26.3,
4,2021-08-09 00:05,-1.0,-0.1,-1.0,0.0,0.0,26.2,93.3,0.1,0.7,0.3,113.2,1.0,997,0,0.0,26.4,26.3,


'Sierra Leone Bumbuna Data'

Unnamed: 0,Timestamp,GHI,DNI,DHI,ModA,ModB,Tamb,RH,WS,WSgust,WSstdev,WD,WDstdev,BP,Cleaning,Precipitation,TModA,TModB,Comments
0,2021-10-30 00:01,-0.7,-0.1,-0.8,0.0,0.0,21.9,99.1,0.0,0.0,0.0,0.0,0.0,1002,0,0.0,22.3,22.6,
1,2021-10-30 00:02,-0.7,-0.1,-0.8,0.0,0.0,21.9,99.2,0.0,0.0,0.0,0.0,0.0,1002,0,0.0,22.3,22.6,
2,2021-10-30 00:03,-0.7,-0.1,-0.8,0.0,0.0,21.9,99.2,0.0,0.0,0.0,0.0,0.0,1002,0,0.0,22.3,22.6,
3,2021-10-30 00:04,-0.7,0.0,-0.8,0.0,0.0,21.9,99.3,0.0,0.0,0.0,0.0,0.0,1002,0,0.1,22.3,22.6,
4,2021-10-30 00:05,-0.7,-0.1,-0.8,0.0,0.0,21.9,99.3,0.0,0.0,0.0,0.0,0.0,1002,0,0.0,22.3,22.6,


'Togo Dapaong Data'

Unnamed: 0,Timestamp,GHI,DNI,DHI,ModA,ModB,Tamb,RH,WS,WSgust,WSstdev,WD,WDstdev,BP,Cleaning,Precipitation,TModA,TModB,Comments
0,2021-10-25 00:01,-1.3,0.0,0.0,0.0,0.0,24.8,94.5,0.9,1.1,0.4,227.6,1.1,977,0,0.0,24.7,24.4,
1,2021-10-25 00:02,-1.3,0.0,0.0,0.0,0.0,24.8,94.4,1.1,1.6,0.4,229.3,0.7,977,0,0.0,24.7,24.4,
2,2021-10-25 00:03,-1.3,0.0,0.0,0.0,0.0,24.8,94.4,1.2,1.4,0.3,228.5,2.9,977,0,0.0,24.7,24.4,
3,2021-10-25 00:04,-1.2,0.0,0.0,0.0,0.0,24.8,94.3,1.2,1.6,0.3,229.1,4.6,977,0,0.0,24.7,24.4,
4,2021-10-25 00:05,-1.2,0.0,0.0,0.0,0.0,24.8,94.0,1.3,1.6,0.4,227.5,1.6,977,0,0.0,24.7,24.4,


In [18]:
# The size of the datasets
display(
    "Benin Malaniville Data",
    benin_malaniville_data.shape,
    "Sierra Leone Bumbuna Data",
    sierra_leone_bumbuna_data.shape,
    "Togo Dapaong Data",
    togo_dapaong_data.shape
)

'Benin Malaniville Data'

(525600, 19)

'Sierra Leone Bumbuna Data'

(525600, 19)

'Togo Dapaong Data'

(525600, 19)

Each of the datasets contain 19 columns and 525,600 rows

Description for the column names

- GHI (W/m²): Global Horizontal Irradiance, the total solar radiation received per square meter on a horizontal surface.
- DNI (W/m²): Direct Normal Irradiance, the amount of solar radiation received per square meter on a surface perpendicular to the rays of the sun.
- DHI (W/m²): Diffuse Horizontal Irradiance, solar radiation received per square meter on a horizontal surface that does not arrive on a direct path from the sun.
- ModA (W/m²): Measurements from a module or sensor (A), similar to irradiance.
- ModB (W/m²): Measurements from a module or sensor (B), similar to irradiance.
- Tamb (°C): Ambient Temperature in degrees Celsius.
- RH (%): Relative Humidity as a percentage of moisture in the air.
- WS (m/s): Wind Speed in meters per second.
- WSgust (m/s): Maximum Wind Gust Speed in meters per second.
- WSstdev (m/s): Standard Deviation of Wind Speed, indicating variability.
- WD (°N (to east)): Wind Direction in degrees from north.
- WDstdev: Standard Deviation of Wind Direction, showing directional variability.
- BP (hPa): Barometric Pressure in hectopascals.
- Cleaning (1 or 0): Signifying whether cleaning (possibly of the modules or sensors) occurred.
- Precipitation (mm/min): Precipitation rate measured in millimeters per minute.
- TModA (°C): Temperature of Module A in degrees Celsius.
- TModB (°C): Temperature of Module B in degrees Celsius.
- Comments: This column is designed for any additional notes.


## Summary Statistics: