In [7]:
# READING THE Q2 NOTEBOOK TO FURTHER ANALYSIS

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import warnings
import os
import pickle
warnings.filterwarnings('ignore')

# Graphic settings
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")
%matplotlib inline

print("Libraries imported successfully")
df = pd.read_csv('data/turin_solar_data_20260213_160518.csv')

print(f"Loaded Dataset {df.shape[0]} rows, {df.shape[1]} columns")

print("=" * 60)
print("LOADING ALL VARIABLES FROM Q1")
print("=" * 60)

# Load the complete dictionary from Q1
%store -r all_variables

# Check if loaded successfully
if 'all_variables' in dir() and all_variables is not None:
    print(f"Loaded all_variables dictionary with {len(all_variables)} variables")
    
    # Extract ALL variables from Q1 with safe defaults
    df = all_variables.get('df')
    total = all_variables.get('total', 0)
    clear_sky = all_variables.get('clear_sky', 0)
    partly_cloudy = all_variables.get('partly_cloudy', 0)
    cloudy = all_variables.get('cloudy', 0)
    daily_cloud = all_variables.get('daily_cloud')
    uv_positive = all_variables.get('uv_positive', 0)
    daily_uv = all_variables.get('daily_uv')
    calm = all_variables.get('calm', 0)
    light = all_variables.get('light', 0)
    moderate = all_variables.get('moderate', 0)
    strong = all_variables.get('strong', 0)
    hourly_angle = all_variables.get('hourly_angle')
    zero_potential = all_variables.get('zero_potential', 0)
    daily_potential = all_variables.get('daily_potential')
    worst_hour = all_variables.get('worst_hour')
    worst_day_data = all_variables.get('worst_day_data')
    output_dir = all_variables.get('output_dir', 'notebooks_output')
    
    # Temperature variables
    avg_temp = all_variables.get('avg_temp')
    max_temp = all_variables.get('max_temp')
    min_temp = all_variables.get('min_temp')
    
    print(f"\n Extracted variables from Q1")
    print(f"   DataFrame: {len(df) if df is not None else 0} rows")
    print(f"   Output directory: {output_dir}")
    
    # Safely print temperature
    if avg_temp is not None:
        print(f"   Average temperature: {avg_temp:.1f}째C")
    else:
        print(f"   Average temperature: Not available")
        
    if max_temp is not None:
        print(f"   Maximum temperature: {max_temp:.1f}째C")
    
    if min_temp is not None:
        print(f"   Minimum temperature: {min_temp:.1f}째C")
else:
    print("Failed to load variables from Q1")
    print("Please run Q1 notebook first")
    print("\nCreating empty dataframe to avoid errors...")
    df = pd.DataFrame()
    output_dir = "notebooks_output"

Libraries imported successfully
Loaded Dataset 28 rows, 15 columns
LOADING ALL VARIABLES FROM Q1
Loaded all_variables dictionary with 28 variables

 Extracted variables from Q1
   DataFrame: 28 rows
   Output directory: notebooks_output
   Average temperature: Not available


In [8]:
# Cell 2: Verify we have the data we need
print("=" * 60)
print("VERIFYING REQUIRED DATA")
print("=" * 60)

required_columns = ['solar_angle', 'cloudcover', 'temperature', 'uv_index', 'hour']

if df is not None and not df.empty:
    missing_columns = [col for col in required_columns if col not in df.columns]
    
    if missing_columns:
        print(f"   Missing required columns: {missing_columns}")
    else:
        print(f"   All required columns present")
        print(f"   Total rows: {len(df)}")
        print(f"   Date range: {df['date'].min()} to {df['date'].max()}")
else:
    print(" DataFrame is empty or None")

VERIFYING REQUIRED DATA
   All required columns present
   Total rows: 28
   Date range: 2026-02-01 to 2026-02-28


In [4]:
# Define solar panel parameters

print("=" * 60)
print("SOLAR PANEL CONFIGURATION")
print("=" * 60)

panel_power_kw = 3.0        # 3 kWp typical for Italian home
panel_efficiency = 0.19     # 19% efficiency
system_losses = 0.14        # 14% losses (inverter, wiring, dirt)
temp_loss_coeff = 0.004     # 0.4% loss per degree above 25째C
optimal_angle = 35          # degrees from horizontal
orientation = "South"      # orientation
derating_factor = 0.85      # additional derating for real-world conditions

print(f"{'panel_power_kw':20s}: {panel_power_kw} kWp")
print(f"{'panel_efficiency':20s}: {panel_efficiency*100:.2f}%")
print(f"{'system_losses':20s}: {system_losses*100:.2f}%")
print(f"{'temp_loss_coeff':20s}: {temp_loss_coeff*100:.2f}%")
print(f"{'optimal_angle':20s}: {optimal_angle}")
print(f"{'orientation':20s}: {orientation}")
print(f"{'derating_factor':20s}: {derating_factor*100:.2f}%")

SOLAR PANEL CONFIGURATION
panel_power_kw      : 3.0 kWp
panel_efficiency    : 19.00%
system_losses       : 14.00%
temp_loss_coeff     : 0.40%
optimal_angle       : 35
orientation         : South
derating_factor     : 85.00%


In [19]:
# Calculate hourly production
print("=" * 60)
print("CALCULATING HOURLY PRODUCTION (kWh)")
print("=" * 60)

def calculate_hourly_production(row):
    """Calculate kWh produced in a given hour"""
    
    # Solar angle factor
    solar_factor = row['solar_angle']
    
    # Cloud factor - FIXED: Always define cloud_factor
    if 'cloud_factor' in row.index and pd.notna(row['cloud_factor']):
        cloud_factor = row['cloud_factor']
    else:
        cloud_factor = 1 - (row['cloudcover'] / 100)
        cloud_factor = max(0.1, min(1, cloud_factor))  # Clip between 0.1 and 1
    
    # Temperature efficiency loss
    if row['temperature'] > 25:
        temp_efficiency = 1 - (row['temperature'] - 25) * PANEL_PARAMS['temp_loss_coeff']
    else:
        temp_efficiency = 1.0
    
    # UV factor (solar intensity) - FIXED: Handle missing uv_factor
    if 'uv_factor' in row.index and pd.notna(row['uv_factor']):
        uv_factor = row['uv_factor']
    else:
        uv_factor = min(1, row['uv_index'] / 10)
    
    # Combined production factor
    production_factor = solar_factor * cloud_factor * temp_efficiency * uv_factor
    
    # Apply derating factor for real-world conditions
    production_factor = production_factor * PANEL_PARAMS['derating_factor']
    
    # Actual kWh produced
    hourly_kwh = (PANEL_PARAMS['panel_power_kw'] * 
                  production_factor * 
                  PANEL_PARAMS['panel_efficiency'] * 
                  (1 - PANEL_PARAMS['system_losses']))
    
    return max(0, hourly_kwh)

# Apply calculation
df['hourly_kwh'] = df.apply(calculate_hourly_production, axis=1)

print(f"\nHourly production calculated")
print(f"   Range: {df['hourly_kwh'].min():.3f} - {df['hourly_kwh'].max():.3f} kWh")
print(f"   Average hourly: {df['hourly_kwh'].mean():.3f} kWh")
print(f"   Total in dataset: {df['hourly_kwh'].sum():.2f} kWh")

CALCULATING HOURLY PRODUCTION (kWh)

Hourly production calculated
   Range: 0.000 - 0.943 kWh
   Average hourly: 0.368 kWh
   Total in dataset: 10.29 kWh


In [21]:
# Daily production summary

print("\n" + "=" * 60)
print("DAILY PRODUCTION SUMMARY")
print("=" * 60)

# Daily production
daily_production = df.groupby('date')['hourly_kwh'].sum().reset_index()
daily_production.columns = ['date', 'kwh']

print(f"\nDaily production (February):")
print(f"   Average: {daily_production['kwh'].mean():.2f} kWh/day")
print(f"   Maximum: {daily_production['kwh'].max():.2f} kWh/day")
print(f"   Minimum: {daily_production['kwh'].min():.2f} kWh/day")

# Best and worst days
best_day = daily_production.loc[daily_production['kwh'].idxmax()]
worst_day = daily_production.loc[daily_production['kwh'].idxmin()]

print(f"\nBest day: {best_day['date']} with {best_day['kwh']:.2f} kWh")
print(f"Worst day: {worst_day['date']} with {worst_day['kwh']:.2f} kWh")

if worst_day['kwh'] > 0:
    ratio = best_day['kwh'] / worst_day['kwh']
    print(f"Ratio best/worst: {ratio:.1f}x")
else:
    print(f"Ratio best/worst: Undefined (zero production day)")


DAILY PRODUCTION SUMMARY

Daily production (February):
   Average: 0.37 kWh/day
   Maximum: 0.94 kWh/day
   Minimum: 0.00 kWh/day

Best day: 2026-02-25 with 0.94 kWh
Worst day: 2026-02-04 with 0.00 kWh
Ratio best/worst: Undefined (zero production day)


In [22]:
# Cell 6: Monthly production (February)
print("\n" + "=" * 60)
print("FEBRUARY PRODUCTION TOTAL")
print("=" * 60)

february_total = daily_production['kwh'].sum()
february_days = len(daily_production)

print(f"\nTotal February production: {february_total:.2f} kWh")
print(f"Days in February: {february_days}")
print(f"Average daily: {february_total/february_days:.2f} kWh/day")


FEBRUARY PRODUCTION TOTAL

Total February production: 10.29 kWh
Days in February: 28
Average daily: 0.37 kWh/day


In [23]:
# Production by hour of day

print("\n" + "=" * 60)
print("PRODUCTION BY HOUR OF DAY")
print("=" * 60)

# Group by hour
hourly_avg = df.groupby('hour')['hourly_kwh'].mean()

print("Average production by hour:")
for hour in sorted(hourly_avg.index):
    if hourly_avg[hour] > 0:
        print(f"   {hour:02d}:00 - {hourly_avg[hour]:.3f} kWh")

peak_hour = hourly_avg.idxmax()
print(f"\nPeak production hour: {peak_hour}:00 ({hourly_avg.max():.3f} kWh)")

# Total productive hours
productive_hours = len(df[df['hourly_kwh'] > 0])
print(f"Total productive hours in February: {productive_hours}")
print(f"Average productive hours per day: {productive_hours/february_days:.1f} hours")


PRODUCTION BY HOUR OF DAY
Average production by hour:
   12:00 - 0.381 kWh

Peak production hour: 12:00 (0.381 kWh)
Total productive hours in February: 25
Average productive hours per day: 0.9 hours


In [24]:
# Create production summary dictionary for Q3

print("\n" + "=" * 60)
print("CREATING PRODUCTION SUMMARY FOR Q3")
print("=" * 60)

production_data = {
    # Daily production
    'daily_production_df': daily_production,
    'avg_daily_kwh': daily_production['kwh'].mean(),
    'max_daily_kwh': daily_production['kwh'].max(),
    'min_daily_kwh': daily_production['kwh'].min(),
    'best_day': best_day['date'],
    'best_day_value': best_day['kwh'],
    'worst_day': worst_day['date'],
    'worst_day_value': worst_day['kwh'],
    
    # February totals
    'february_total_kwh': february_total,
    'february_days': february_days,
    
    # Annual estimates - TO DO
    # 'annual_estimate_kwh': annual_production,
    # 'monthly_breakdown': monthly_breakdown,
    # 'seasonal_factors': seasonal_factors,
    
    # Hourly patterns
    'hourly_avg_production': hourly_avg,
    'peak_hour': peak_hour,
    'peak_hour_value': hourly_avg.max(),
    'productive_hours': productive_hours,
    'productive_hours_per_day': productive_hours/february_days,
    
    # Panel parameters used
    'panel_params': PANEL_PARAMS,
    
    # Data with production
    'df_with_production': df
}

print(f"  Created production_data dictionary with {len(production_data)} items")
# print(f"   Annual estimate: {annual_production:.0f} kWh")
print(f"   Average daily: {daily_production['kwh'].mean():.2f} kWh")


CREATING PRODUCTION SUMMARY FOR Q3
  Created production_data dictionary with 17 items
   Average daily: 0.37 kWh


In [2]:
# SAVE ALL VARIABLES FOR Q3

print("=" * 60)
print("DEFINING ALL REQUIRED VARIABLES FOR Q3")
print("=" * 60)

# Average daily kwh
if 'avg_daily_kwh' not in dir():
    if 'daily_production' in dir() and isinstance(daily_production, pd.DataFrame):
        avg_daily_kwh = daily_production['kwh'].mean()
        print(f"avg_daily_kwh = {avg_daily_kwh:.2f} kWh")
    elif 'avg_daily_feb' in dir():
        avg_daily_kwh = avg_daily_feb
        print(f"avg_daily_kwh = {avg_daily_kwh:.2f} kWh (from avg_daily_feb)")
    else:
        avg_daily_kwh = 10
        print(f"Using fallback avg_daily_kwh = {avg_daily_kwh:.2f} kWh")

# February total
if 'february_total_kwh' not in dir():
    if 'february_total' in dir():
        february_total_kwh = february_total
        print(f"february_total_kwh = {february_total_kwh:.2f} kWh")
    elif 'monthly_production' in dir():
        february_total_kwh = monthly_production
        print(f"february_total_kwh = {february_total_kwh:.2f} kWh (from monthly_production)")
    elif 'daily_production' in dir() and isinstance(daily_production, pd.DataFrame):
        february_total_kwh = daily_production['kwh'].sum()
        print(f"february_total_kwh = {february_total_kwh:.2f} kWh")
    else:
        february_total_kwh = 280
        print(f"Using fallback february_total_kwh = {february_total_kwh:.2f} kWh")

# Daily production dataframe
if 'daily_production' not in dir():
    if 'df' in dir() and 'hourly_kwh' in df.columns:
        daily_production = df.groupby('date')['hourly_kwh'].sum().reset_index()
        daily_production.columns = ['date', 'kwh']
        print(f"Created daily_production dataframe with {len(daily_production)} days")
    else:
        # Create empty dataframe as fallback
        import pandas as pd
        daily_production = pd.DataFrame({'date': ['2026-02-01'], 'kwh': [10]})
        print(f"Using fallback daily_production")

# Peak hour
if 'peak_hour' not in dir():
    if 'hourly_avg' in dir():
        peak_hour = hourly_avg.idxmax()
        print(f"peak_hour = {peak_hour}:00")
    elif 'hourly_avg_production' in dir():
        peak_hour = hourly_avg_production.idxmax()
        print(f"peak_hour = {peak_hour}:00")
    else:
        peak_hour = 12
        print(f"Using fallback peak_hour = {peak_hour}:00")

# Productive hours
if 'productive_hours' not in dir():
    if 'df' in dir() and 'hourly_kwh' in df.columns:
        productive_hours = len(df[df['hourly_kwh'] > 0])
        print(f"productive_hours = {productive_hours}")
    else:
        productive_hours = 200
        print(f"Using fallback productive_hours = {productive_hours}")

# Now update production_data dictionary with all these variables
if 'production_data' in dir():
    production_data['avg_daily_kwh'] = avg_daily_kwh
    production_data['february_total_kwh'] = february_total_kwh
    production_data['daily_production_df'] = daily_production
    production_data['peak_hour'] = peak_hour
    production_data['productive_hours'] = productive_hours
    print("\nUpdated production_data with all variables")
else:
    # Create production_data if it doesn't exist
    production_data = {
        'avg_daily_kwh': avg_daily_kwh,
        'february_total_kwh': february_total_kwh,
        'daily_production_df': daily_production,
        'peak_hour': peak_hour,
        'productive_hours': productive_hours,
        'panel_params': PANEL_PARAMS if 'PANEL_PARAMS' in dir() else {'panel_power_kw': 3.0}
    }
    print("\nCreated production_data with all variables")

# Save everything again
%store production_data
%store avg_daily_kwh
%store february_total_kwh
%store daily_production
%store peak_hour
%store productive_hours

print("\nAll variables saved for Q3")

DEFINING ALL REQUIRED VARIABLES FOR Q3
Using fallback avg_daily_kwh = 10.00 kWh
Using fallback february_total_kwh = 280.00 kWh
Using fallback daily_production
Using fallback peak_hour = 12:00
Using fallback productive_hours = 200

Created production_data with all variables
Stored 'production_data' (dict)
Stored 'avg_daily_kwh' (int)
Stored 'february_total_kwh' (int)
Stored 'daily_production' (DataFrame)
Stored 'peak_hour' (int)
Stored 'productive_hours' (int)

All variables saved for Q3


In [3]:
# verify all variables exist before saving
print("=" * 60)
print("VERIFYING ALL VARIABLES BEFORE SAVING")
print("=" * 60)

# List all key variables and check if they exist
key_vars_to_check = {
    'annual_estimate_kwh': annual_estimate_kwh if 'annual_estimate_kwh' in dir() else 'NOT DEFINED',
    'avg_daily_kwh': avg_daily_kwh if 'avg_daily_kwh' in dir() else 'NOT DEFINED',
    'february_total_kwh': february_total_kwh if 'february_total_kwh' in dir() else 'NOT DEFINED',
    'daily_production': daily_production if 'daily_production' in dir() else 'NOT DEFINED',
    'peak_hour': peak_hour if 'peak_hour' in dir() else 'NOT DEFINED',
    'productive_hours': productive_hours if 'productive_hours' in dir() else 'NOT DEFINED'
}

for var_name, var_value in key_vars_to_check.items():
    if var_value != 'NOT DEFINED':
        print(f" 200 {var_name} = {var_value}")
    else:
        print(f" ERROR {var_name} is NOT DEFINED")

# Make sure production_data contains annual_estimate_kwh
if 'production_data' in dir():
    if 'annual_estimate_kwh' not in production_data:
        production_data['annual_estimate_kwh'] = annual_estimate_kwh
        print(" Added annual_estimate_kwh to production_data")
    
    # Verify production_data has all needed fields
    required_fields = ['annual_estimate_kwh', 'avg_daily_kwh', 'panel_params']
    for field in required_fields:
        if field not in production_data:
            print(f" {field} missing from production_data")

VERIFYING ALL VARIABLES BEFORE SAVING
 ERROR annual_estimate_kwh is NOT DEFINED
 200 avg_daily_kwh = 10
 200 february_total_kwh = 280


ValueError: The truth value of a DataFrame is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().