In [10]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Graphic settings
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")
%matplotlib inline

print("Libraries imported successfully")
df = pd.read_csv('turin_solar_data_20260213_160518.csv')

print(f"Loaded Dataset {df.shape[0]} rows, {df.shape[1]} columns")

âœ… Libraries imported successfully
Loaded Dataset 28 rows, 15 columns


In [11]:
df.head(5)

Unnamed: 0,city,temperature,humidity,wind_speed,cloudcover,uv_index,observation_time,date,hour,month,solar_angle,cloud_factor,temp_efficiency,uv_factor,solar_potential
0,Turin,2.0,70,3,80,0.1,2026-02-01 12:00:00,2026-02-01,12,2,0.42,0.2,1.02,0.1,0.08568
1,Turin,3.0,68,4,10,1.2,2026-02-02 12:00:00,2026-02-02,12,2,0.44,1.0,1.02,1.2,0.4488
2,Turin,4.0,65,5,0,1.6,2026-02-03 12:00:00,2026-02-03,12,2,0.46,1.0,1.01,1.6,0.4646
3,Turin,3.5,72,6,100,0.0,2026-02-04 12:00:00,2026-02-04,12,2,0.48,0.0,1.02,0.0,0.0
4,Turin,5.0,60,4,20,1.4,2026-02-05 12:00:00,2026-02-05,12,2,0.5,0.9,1.01,1.4,0.4545


In [12]:
print(df.columns.tolist())

['city', 'temperature', 'humidity', 'wind_speed', 'cloudcover', 'uv_index', 'observation_time', 'date', 'hour', 'month', 'solar_angle', 'cloud_factor', 'temp_efficiency', 'uv_factor', 'solar_potential']


In [13]:
df.dtypes

city                 object
temperature         float64
humidity              int64
wind_speed            int64
cloudcover            int64
uv_index            float64
observation_time     object
date                 object
hour                  int64
month                 int64
solar_angle         float64
cloud_factor        float64
temp_efficiency     float64
uv_factor           float64
solar_potential     float64
dtype: object

In [38]:
# ============================================
# Q1: Preliminary Meteorological Analysis
# ============================================

print("=" * 50)
print("WEATHER STATISTICS TURIN - FEBRUARY 2026")
print("=" * 50)

WEATHER STATISTICS TURIN - FEBRUARY 2026


In [44]:
print("=" * 60)
print("TEMPERATURE STATISTICS")
print("=" * 60)

print(f"\nTEMPERATURE:")
print(f"   - Average: {df['temperature'].mean():.1f}Â°C")
print(f"   - Maximum: {df['temperature'].max():.1f}Â°C")
print(f"   - Minimum: {df['temperature'].min():.1f}Â°C")

# Temperature distribution by percentiles
print(f"\nTEMPERATURE PERCENTILES:")
percentiles = [0, 10, 25, 50, 75, 90, 100]
for p in percentiles:
    print(f"   {p:3d}th percentile: {df['temperature'].quantile(p/100):.1f}Â°C")

TEMPERATURE STATISTICS

TEMPERATURE:
   - Average: 9.0Â°C
   - Maximum: 14.0Â°C
   - Minimum: 2.0Â°C

TEMPERATURE PERCENTILES:
     0th percentile: 2.0Â°C
    10th percentile: 3.9Â°C
    25th percentile: 6.4Â°C
    50th percentile: 9.2Â°C
    75th percentile: 12.0Â°C
    90th percentile: 13.0Â°C
   100th percentile: 14.0Â°C


In [45]:
print("=" * 60)
print("CLOUD COVER STATISTICS")
print("=" * 60)

print(f"\nCLOUD COVER:")
print(f"   - Average: {df['cloudcover'].mean():.1f}")
print(f"   - Minimum: {df['cloudcover'].min():.1f}")
print(f"   - Maximum: {df['cloudcover'].max():.1f}")
print(f"   - Average: {df['cloudcover'].mean():.1f}")
print(f"   - Std Deviation: {df['cloudcover'].std():.1f}")
print(f"   - Median: {df['cloudcover'].median():.1f}")
print(f"   - Clear sky hours (0-20%): {len(df[df['cloudcover'] <= 20])} hours")
print(f"   - Cloudy hours (>60%): {len(df[df['cloudcover'] > 60])} hours")

# Cloud cover categories
clear_sky = len(df[df['cloudcover'] <= 20])
partly_cloudy = len(df[(df['cloudcover'] > 20) & (df['cloudcover'] <= 60)])
cloudy = len(df[df['cloudcover'] > 60])
total = len(df)

# cloud cover distribution
print(f"\nCloud Cover Distribution:")
print(f"   Clear sky (0-20%):     {clear_sky:4d} hours ({clear_sky/total*100:5.1f}%)")
print(f"   Partly cloudy (21-60%): {partly_cloudy:4d} hours ({partly_cloudy/total*100:5.1f}%)")
print(f"   Cloudy (>60%):          {cloudy:4d} hours ({cloudy/total*100:5.1f}%)")

# Daily cloud cover compared by the dates
daily_cloud = df.groupby('date')['cloudcover'].mean()
print(f"\n Daily Cloud Cover:")
print(f"   Average daily cloud cover: {daily_cloud.mean():.1f}%")
print(f"   Most cloudy day: {daily_cloud.idxmax()} ({daily_cloud.max():.1f}%)")
print(f"   Least cloudy day: {daily_cloud.idxmin()} ({daily_cloud.min():.1f}%)")

CLOUD COVER STATISTICS

CLOUD COVER:
   - Average: 34.3
   - Minimum: 0.0
   - Maximum: 100.0
   - Average: 34.3
   - Std Deviation: 33.4
   - Median: 22.5
   - Clear sky hours (0-20%): 14 hours
   - Cloudy hours (>60%): 6 hours

Cloud Cover Distribution:
   Clear sky (0-20%):       14 hours ( 50.0%)
   Partly cloudy (21-60%):    8 hours ( 28.6%)
   Cloudy (>60%):             6 hours ( 21.4%)

 Daily Cloud Cover:
   Average daily cloud cover: 34.3%
   Most cloudy day: 2026-02-04 (100.0%)
   Least cloudy day: 2026-02-03 (0.0%)


In [46]:
print("=" * 60)
print("UV INDEX STATISTICS")
print("=" * 60)

print(f"\nUV INDEX:")
print(f"   - Average: {df['uv_index'].mean():.2f}")
print(f"   - Minimum: {df['uv_index'].min()}")
print(f"   - Maximum: {df['uv_index'].max()}")
print(f"   - Std Deviation: {df['uv_index'].std():.3f}")
print(f"   - Median: {df['uv_index'].median()}")

# Hours with UV > 0
uv_positive = len(df[df['uv_index'] > 0])
print(f"\nUV Activity:")
print(f"   Hours with UV > 0: {uv_positive} ({uv_positive/total*100:.1f}% of time)")
print(f"   Hours with UV = 0: {total - uv_positive} ({(total-uv_positive)/total*100:.1f}% of time)")

# UV by category (if applicable)
print(f"\nUV Intensity Distribution:")
uv_categories = {
    'Low (0-2)': len(df[df['uv_index'] <= 2]),
    'Moderate (2-5)': len(df[(df['uv_index'] > 2) & (df['uv_index'] <= 5)]),
    'High (>5)': len(df[df['uv_index'] > 5])
}

daily_uv = df.groupby('date')['uv_index'].mean()
print(f"\n Daily UV:")
print(f"   Average daily UV ratio: {daily_uv.mean():.1f}%")
print(f"   Most UV ratio: {daily_uv.idxmax()} ({daily_uv.max():.1f}%)")
print(f"   Least UV ratio: {daily_uv.idxmin()} ({daily_uv.min():.1f}%)")

UV INDEX STATISTICS

UV INDEX:
   - Average: 1.46
   - Minimum: 0.0
   - Maximum: 2.9
   - Std Deviation: 0.970
   - Median: 1.45

UV Activity:
   Hours with UV > 0: 26 (92.9% of time)
   Hours with UV = 0: 2 (7.1% of time)

UV Intensity Distribution:

 Daily UV:
   Average daily UV ratio: 1.5%
   Most UV ratio: 2026-02-25 (2.9%)
   Least UV ratio: 2026-02-04 (0.0%)


In [47]:
print("=" * 60)
print("WIND SPEED STATISTICS")
print("=" * 60)

print(f"\nWIND:")
print(f"   - Average: {df['wind_speed'].mean():.1f} km/h")
print(f"   - Minimum gusts: {df['wind_speed'].min():.1f} km/h")
print(f"   - Maximum gusts: {df['wind_speed'].max():.1f} km/h")
print(f"   - Std Deviation: {df['wind_speed'].std():.1f} km/h")
print(f"   - Median: {df['wind_speed'].median():.1f} km/h")

# Wind categories (Beaufort scale approximation)
calm = len(df[df['wind_speed'] < 5])
light = len(df[(df['wind_speed'] >= 5) & (df['wind_speed'] < 20)])
moderate = len(df[(df['wind_speed'] >= 20) & (df['wind_speed'] < 40)])
strong = len(df[df['wind_speed'] >= 40])

print(f"\nWind Conditions:")
print(f"   Calm (<5 km/h):         {calm:4d} hours ({calm/total*100:5.1f}%)")
print(f"   Light (5-20 km/h):      {light:4d} hours ({light/total*100:5.1f}%)")
print(f"   Moderate (20-40 km/h):  {moderate:4d} hours ({moderate/total*100:5.1f}%)")
print(f"   Strong (>40 km/h):      {strong:4d} hours ({strong/total*100:5.1f}%)")

WIND SPEED STATISTICS

WIND:
   - Average: 4.6 km/h
   - Minimum gusts: 3.0 km/h
   - Maximum gusts: 6.0 km/h
   - Std Deviation: 0.9 km/h
   - Median: 5.0 km/h

Wind Conditions:
   Calm (<5 km/h):           12 hours ( 42.9%)
   Light (5-20 km/h):        16 hours ( 57.1%)
   Moderate (20-40 km/h):     0 hours (  0.0%)
   Strong (>40 km/h):         0 hours (  0.0%)


In [53]:
print("=" * 60)
print("SOLAR ANGLE STATISTICS")
print("=" * 60)

print(f"\nSOLAR ANGLE:")
print(f"   - Average: {df['wind_speed'].mean():.2f} km/h")
print(f"   - Minimum gusts: {df['solar_angle'].min():.2f} km/h")
print(f"   - Maximum gusts: {df['solar_angle'].max():.2f} km/h")
print(f"   - Std Deviation: {df['solar_angle'].std():.2f} km/h")
print(f"   - Median: {df['solar_angle'].median():.2f} km/h")

# Solar angle by hour (average)
hourly_angle = df.groupby('hour')['solar_angle'].mean()
print(f"\n Peak solar angle hour: {hourly_angle.idxmax()}:00 ({hourly_angle.max():.3f})")
print(f" Lowest solar angle hour: {hourly_angle.idxmin()}:00 ({hourly_angle.min():.3f})")

SOLAR ANGLE STATISTICS

SOLAR ANGLE:
   - Average: 4.64 km/h
   - Minimum gusts: 0.42 km/h
   - Maximum gusts: 0.81 km/h
   - Std Deviation: 0.12 km/h
   - Median: 0.69 km/h

 Peak solar angle hour: 15:00 (0.707)
 Lowest solar angle hour: 12:00 (0.647)


In [58]:
print("=" * 60)
print("SOLAR POTENTIAL STATISTICS (Raw Dataset)")
print("=" * 60)

print(f"\nSOLAR POTENTIAL:")
print(f"   - Average: {df['solar_potential'].mean():.2f}")
print(f"   - Minimum gusts: {df['solar_potential'].min():.2f}")
print(f"   - Maximum gusts: {df['solar_potential'].max():.2f}")
print(f"   - Std Deviation: {df['solar_potential'].std():.2f}")
print(f"   - Median: {df['solar_potential'].median():.2f} ")
print(f"   - Total: {df['solar_potential'].sum():.2f}")

# Hours with zero potential
zero_potential = len(df[df['solar_potential'] == 0])
print(f"\nZero Production Analysis:")
print(f"Hours with zero potential: {zero_potential} ({zero_potential/total*100:.2f}% of time)")

# Daily totals
daily_potential = df.groupby('date')['solar_potential'].sum()
print(f"\nDaily Solar Potential:")
print(f"   Average daily: {daily_potential.mean():.3f}")
print(f"   Best day: {daily_potential.idxmax()} ({daily_potential.max():.3f})")
print(f"   Worst day: {daily_potential.idxmin()} ({daily_potential.min():.3f})")

# Safe ratio calculation
if daily_potential.min() > 0:
    ratio = daily_potential.max() / daily_potential.min()
    print(f"   Ratio best/worst: {ratio:.1f}x")
else:
    print(f"   Ratio best/worst: Undefined (worst day has zero production)")
    
    # Alternative: compare best day to average
    if daily_potential.mean() > 0:
        print(f"   Best day vs average: {daily_potential.max()/daily_potential.mean():.1f}x average")

SOLAR POTENTIAL STATISTICS (Raw Dataset)

SOLAR POTENTIAL:
   - Average: 0.45
   - Minimum gusts: 0.00
   - Maximum gusts: 0.80
   - Std Deviation: 0.25
   - Median: 0.49 
   - Total: 12.61

Zero Production Analysis:
Hours with zero potential: 2 (7.14% of time)

Daily Solar Potential:
   Average daily: 0.450
   Best day: 2026-02-27 (0.800)
   Worst day: 2026-02-04 (0.000)
   Ratio best/worst: Undefined (worst day has zero production)
   Best day vs average: 1.8x average


In [59]:
# Cell 10: Worst-case analysis (for conservative estimates)
print("=" * 60)
print("WORST-CASE ANALYSIS")
print("=" * 60)

# Check if we have any non-zero solar potential
if df['solar_potential'].max() == 0:
    print("\nWARNING: All solar potential values are zero!")
    print("   This suggests there might be an issue with the data.")
    print("   Please check your CSV file.")
else:
    # Absolute worst hour
    worst_hour = df.loc[df['solar_potential'].idxmin()]
    print(f"\nAbsolute worst hour in dataset:")
    print(f"   Date: {worst_hour['date']} at {worst_hour['hour']}:00")
    print(f"   Solar potential: {worst_hour['solar_potential']:.3f}")
    print(f"   Conditions: {worst_hour['cloudcover']:.0f}% cloud cover, {worst_hour['temperature']:.1f}Â°C")
    print(f"   UV Index: {worst_hour['uv_index']}, Wind: {worst_hour['wind_speed']} km/h")

    # Worst day
    worst_day_idx = daily_potential.idxmin()
    worst_day_data = df[df['date'] == worst_day_idx]
    print(f"\nWorst day ({worst_day_idx}):")
    print(f"   Total potential: {daily_potential.min():.3f}")
    print(f"   Average cloud cover: {worst_day_data['cloudcover'].mean():.1f}%")
    print(f"   Average temperature: {worst_day_data['temperature'].mean():.1f}Â°C")
    print(f"   Hours with production: {len(worst_day_data[worst_day_data['solar_potential'] > 0])}")

    # Consecutive bad days (for battery sizing consideration)
    if daily_potential.mean() > 0:
        print(f"\nFor battery storage planning:")
        print(f"   Worst day is {(1 - daily_potential.min()/daily_potential.mean())*100:.0f}% below average")
        print(f"   You need to size for {daily_potential.min():.2f} worst-case daily production")

WORST-CASE ANALYSIS

Absolute worst hour in dataset:
   Date: 2026-02-04 at 12:00
   Solar potential: 0.000
   Conditions: 100% cloud cover, 3.5Â°C
   UV Index: 0.0, Wind: 6 km/h

Worst day (2026-02-04):
   Total potential: 0.000
   Average cloud cover: 100.0%
   Average temperature: 3.5Â°C
   Hours with production: 0

For battery storage planning:
   Worst day is 100% below average
   You need to size for 0.00 worst-case daily production


In [62]:
# Q1 ANALYSIS FINISHED

# PREPARING FOR EXPORTING ALL THE DATA TO Q2 - SEPARATED NOTEBOOKS

# ============================================
# SAVE ALL VARIABLES FOR Q2
# ============================================

# ============================================
# SAVE ALL VARIABLES FOR Q2
# ============================================

import os

print("\n" + "=" * 60)
print(" SAVING ALL VARIABLES FOR Q2")
print("=" * 60)

# DEFINE OUTPUT DIRECTORY FIRST
output_dir = "notebooks_output"
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
print(f" Output directory: {output_dir}")

# Get all local variables (excluding built-ins and imports)
all_variables = {}
for var_name in dir():
    # Skip built-in variables, modules, and private variables
    if not var_name.startswith('_') and var_name not in ['In', 'Out', 'exit', 'quit', 'get_ipython', 'pd', 'np', 'plt', 'sns', 'datetime', 'warnings', 'os']:
        try:
            all_variables[var_name] = eval(var_name)
        except:
            pass

print(f" Found {len(all_variables)} variables to save")

# Method 1: Save individual variables with %store
print("\nðŸ“¤ Saving individual variables with %store...")
saved_count = 0
for var_name in all_variables.keys():
    try:
        # Use %store command
        get_ipython().run_line_magic('store', var_name)
        saved_count += 1
        print(f"   {var_name}")
    except:
        print(f"   {var_name} (could not store)")

print(f"\n Saved {saved_count} individual variables with %store")

# Method 2: Save complete dictionary as backup
import pickle
backup_file = f"{output_dir}/q1_all_variables.pkl"
with open(backup_file, 'wb') as f:
    pickle.dump(all_variables, f)

print(f"   Complete backup saved to: {backup_file}")
print(f"   File size: {os.path.getsize(backup_file) / 1024:.1f} KB")

# Method 3: Also store the dictionary itself
%store all_variables

print("\n" + "=" * 60)
print(" ALL VARIABLES SAVED SUCCESSFULLY!")
print("=" * 60)

print("""
 TO ACCESS VARIABLES IN Q2:

   # Method A: Load individual variables (if you know what you need)
   %store -r df
   %store -r avg_temp
   %store -r daily_potential

   # Method B: Load the complete dictionary
   %store -r all_variables
   df = all_variables['df']
   avg_temp = all_variables['avg_temp']

   # Method C: Load everything automatically
   stored_vars = %store
   for var in [v.split(':')[0].strip() for v in stored_vars if 'df' in v or 'temp' in v or 'cloud' in v]:
       try:
           exec(f"%store -r {var}")
           print(f"Loaded: {var}")
       except:
           pass
""")


 SAVING ALL VARIABLES FOR Q2
 Output directory: notebooks_output
 Found 29 variables to save

ðŸ“¤ Saving individual variables with %store...
   all_variables (could not store)
Stored 'axes' (ndarray)
   axes
Stored 'calm' (int)
   calm
Stored 'clear_sky' (int)
   clear_sky
Stored 'cloudy' (int)
   cloudy
Stored 'daily_cloud' (Series)
   daily_cloud
Stored 'daily_potential' (Series)
   daily_potential
Stored 'daily_uv' (Series)
   daily_uv
Stored 'df' (DataFrame)
   df
Stored 'fig' (Figure)
   fig
Stored 'hourly_angle' (Series)
   hourly_angle
Stored 'light' (int)
   light
Stored 'moderate' (int)
   moderate
   open (could not store)
Stored 'output_dir' (str)
   output_dir
Stored 'p' (int)
   p
Stored 'partly_cloudy' (int)
   partly_cloudy
Stored 'percentiles' (list)
   percentiles
   pickle (could not store)
Stored 'saved_count' (int)
   saved_count
Stored 'strong' (int)
   strong
Stored 'total' (int)
   total
Stored 'uv_categories' (dict)
   uv_categories
Stored 'uv_positive' (int)


PicklingError: Can't pickle <function open at 0x000001BFDB510D60>: it's not the same object as _io.open