# Exploratory Data Analysis (EDA) - Benin (Malanville) Solar Data
#
# **Objective:** Profile, clean, and explore Benin's (Malanville) solar dataset to understand its characteristics, identify trends, and extract insights for potential solar investments.
#
# **Business Objective Context:** MoonLight Energy Solutions aims to enhance operational efficiency and sustainability through targeted solar investments. This EDA will help identify high-potential regions by analyzing environmental measurement data.

## 1. Setup and Data Loading

In [3]:
#  1. Setup and Data Loading
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import zscore
import os # For creating directory if it doesn't exist

# Plotting style
plt.style.use('seaborn-v0_8-whitegrid') # A modern seaborn style
sns.set_palette("viridis") # A nice color palette


In [4]:
DATA_DIR = '../data/'
FILE_NAME = 'benin-malanville.csv' 
data_path = os.path.join(DATA_DIR, FILE_NAME)

CLEANED_DATA_DIR = '../data/' 
if not os.path.exists(CLEANED_DATA_DIR):
    os.makedirs(CLEANED_DATA_DIR)
cleaned_file_path = os.path.join(CLEANED_DATA_DIR, 'benin-malanville_clean.csv') # <<< Adjusted cleaned file name

In [5]:
try:
    df_benin = pd.read_csv(data_path)
    print(f"Successfully loaded {data_path}")
except FileNotFoundError:
    print(f"Error: {data_path} not found. Ensure the file is in the '{DATA_DIR}' directory.")
    df_benin = None # Set to None if file not found
except Exception as e:
    print(f"An error occurred while loading the file: {e}")
    df_benin = None

Successfully loaded ../data/benin-malanville.csv


 ## 2. Initial Data Inspection

In [6]:
if df_benin is not None:
    print("--- First 5 Rows ---")
    display(df_benin.head())

    print("\n--- Last 5 Rows ---")
    display(df_benin.tail())

    print("\n--- Data Info ---")
    df_benin.info()

    print(f"\n--- Data Shape ---")
    print(f"Rows: {df_benin.shape[0]}, Columns: {df_benin.shape[1]}")

    print("\n--- Data Types ---")
    display(df_benin.dtypes)
else:
    print("DataFrame not loaded. Skipping initial inspection.")

--- First 5 Rows ---


Unnamed: 0,Timestamp,GHI,DNI,DHI,ModA,ModB,Tamb,RH,WS,WSgust,WSstdev,WD,WDstdev,BP,Cleaning,Precipitation,TModA,TModB,Comments
0,2021-08-09 00:01,-1.2,-0.2,-1.1,0.0,0.0,26.2,93.4,0.0,0.4,0.1,122.1,0.0,998,0,0.0,26.3,26.2,
1,2021-08-09 00:02,-1.1,-0.2,-1.1,0.0,0.0,26.2,93.6,0.0,0.0,0.0,0.0,0.0,998,0,0.0,26.3,26.2,
2,2021-08-09 00:03,-1.1,-0.2,-1.1,0.0,0.0,26.2,93.7,0.3,1.1,0.5,124.6,1.5,997,0,0.0,26.4,26.2,
3,2021-08-09 00:04,-1.1,-0.1,-1.0,0.0,0.0,26.2,93.3,0.2,0.7,0.4,120.3,1.3,997,0,0.0,26.4,26.3,
4,2021-08-09 00:05,-1.0,-0.1,-1.0,0.0,0.0,26.2,93.3,0.1,0.7,0.3,113.2,1.0,997,0,0.0,26.4,26.3,



--- Last 5 Rows ---


Unnamed: 0,Timestamp,GHI,DNI,DHI,ModA,ModB,Tamb,RH,WS,WSgust,WSstdev,WD,WDstdev,BP,Cleaning,Precipitation,TModA,TModB,Comments
525595,2022-08-08 23:56,-5.5,-0.1,-5.9,0.0,0.0,23.1,98.3,0.3,1.1,0.5,119.3,4.1,996,0,0.0,23.5,22.9,
525596,2022-08-08 23:57,-5.5,-0.1,-5.8,0.0,0.0,23.1,98.3,0.2,0.7,0.4,115.2,2.3,996,0,0.0,23.5,22.9,
525597,2022-08-08 23:58,-5.5,-0.1,-5.8,0.0,0.0,23.1,98.4,0.6,1.1,0.5,129.8,3.4,996,0,0.0,23.5,22.9,
525598,2022-08-08 23:59,-5.5,-0.1,-5.8,0.0,0.0,23.1,98.3,0.9,1.3,0.5,124.4,4.3,996,0,0.0,23.5,22.9,
525599,2022-08-09 00:00,-5.5,-0.1,-5.7,0.0,0.0,23.1,98.3,1.2,1.6,0.3,124.1,5.9,996,0,0.0,23.5,22.9,



--- Data Info ---
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 525600 entries, 0 to 525599
Data columns (total 19 columns):
 #   Column         Non-Null Count   Dtype  
---  ------         --------------   -----  
 0   Timestamp      525600 non-null  object 
 1   GHI            525600 non-null  float64
 2   DNI            525600 non-null  float64
 3   DHI            525600 non-null  float64
 4   ModA           525600 non-null  float64
 5   ModB           525600 non-null  float64
 6   Tamb           525600 non-null  float64
 7   RH             525600 non-null  float64
 8   WS             525600 non-null  float64
 9   WSgust         525600 non-null  float64
 10  WSstdev        525600 non-null  float64
 11  WD             525600 non-null  float64
 12  WDstdev        525600 non-null  float64
 13  BP             525600 non-null  int64  
 14  Cleaning       525600 non-null  int64  
 15  Precipitation  525600 non-null  float64
 16  TModA          525600 non-null  float64
 17  TModB     

Timestamp         object
GHI              float64
DNI              float64
DHI              float64
ModA             float64
ModB             float64
Tamb             float64
RH               float64
WS               float64
WSgust           float64
WSstdev          float64
WD               float64
WDstdev          float64
BP                 int64
Cleaning           int64
Precipitation    float64
TModA            float64
TModB            float64
Comments         float64
dtype: object