In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

def load_and_clean_data(country):
    df = pd.read_csv(f"data/{country}.csv")
    
    # Convert timestamp
    df['Timestamp'] = pd.to_datetime(df['Timestamp'])
    
    # Calculate z-scores for key metrics
    columns_to_check = ['GHI', 'DNI', 'DHI', 'ModA', 'ModB', 'WS', 'WSgust']
    z_scores = np.abs(stats.zscore(df[columns_to_check]))
    
    # Flag outliers (z-score > 3)
    outlier_mask = (z_scores > 3).any(axis=1)
    
    # Handle missing values
    for col in columns_to_check:
        if df[col].isnull().sum() > len(df) * 0.05:
            df[col] = df[col].fillna(df[col].median())
            
    return df

def generate_visualizations(df, country):
    # Time series analysis
    plt.figure(figsize=(12, 6))
    sns.lineplot(data=df, x='Timestamp', y='GHI')
    plt.title(f'GHI Time Series for {country}')
    plt.savefig(f'reports/{country}_ghi_timeseries.png')
    
    # Correlation heatmap
    plt.figure(figsize=(10, 8))
    sns.heatmap(df[['GHI', 'DNI', 'DHI', 'Tamb', 'RH']].corr(), annot=True)
    plt.title(f'Correlation Matrix for {country}')
    plt.savefig(f'reports/{country}_correlation.png')