In [None]:
import pandas as pd

df = pd.read_csv('data/history.csv')
#print(df.head())
#print(df.columns)

#Specifying columns to check for zeros
cols_to_check = ["sunshine_duration (s)", "daylight_duration (s)", "shortwave_radiation_sum (MJ/m¬≤)"]

#Checking for zeros in the specified columns
rows_with_zero = df[(df[cols_to_check] == 0).any(axis=1)]

print(rows_with_zero)

             time  temperature_2m_mean (¬∞C)  temperature_2m_max (¬∞C)  \
143    1994-05-24                      26.0                     28.9   
178    1994-06-28                      27.0                     28.2   
494    1995-05-10                      27.1                     30.1   
497    1995-05-13                      26.2                     27.3   
500    1995-05-16                      26.1                     26.4   
...           ...                       ...                      ...   
11104  2024-05-27                      27.0                     27.8   
11170  2024-08-01                      26.9                     27.9   
11215  2024-09-15                      25.8                     26.9   
11255  2024-10-25                      25.1                     25.8   
11517  2025-07-14                      26.3                     27.8   

       temperature_2m_min (¬∞C)  sunshine_duration (s)  daylight_duration (s)  \
143                       24.4                    0.

In [11]:
import pandas as pd

# If you uncomment the header line in history.csv first, use this simpler version:
df = pd.read_csv('data/history.csv')

# Filter rows where sunshine_duration is 0.0
zero_sunshine_rows = df[df['sunshine_duration (s)'] == 0.0]

# Save to a new CSV file
zero_sunshine_rows.to_csv('data/zero_sunshine_days.csv', index=False)

print(f"Found {len(zero_sunshine_rows)} days with zero sunshine duration")
print(f"Results saved to 'data/zero_sunshine_days.csv'")
print("\nPreview:")
print(zero_sunshine_rows[['time', 'sunshine_duration (s)', 'temperature_2m_mean (¬∞C)']].head())

Found 172 days with zero sunshine duration
Results saved to 'data/zero_sunshine_days.csv'

Preview:
           time  sunshine_duration (s)  temperature_2m_mean (¬∞C)
143  1994-05-24                    0.0                      26.0
178  1994-06-28                    0.0                      27.0
494  1995-05-10                    0.0                      27.1
497  1995-05-13                    0.0                      26.2
500  1995-05-16                    0.0                      26.1


In [12]:
import pandas as pd
import requests
from datetime import datetime
import time
from tqdm import tqdm  # For progress bar

# Read the zero sunshine days
zero_sunshine_df = pd.read_csv('data/zero_sunshine_days.csv')

# Read the main history file
history_df = pd.read_csv('data/history.csv', comment='#', header=None, skiprows=1)
history_df.columns = ['time', 'temperature_2m_mean (¬∞C)', 'temperature_2m_max (¬∞C)', 
                      'temperature_2m_min (¬∞C)', 'sunshine_duration (s)', 
                      'daylight_duration (s)', 'shortwave_radiation_sum (MJ/m¬≤)']

# Ensure 'time' column is datetime
zero_sunshine_df['time'] = pd.to_datetime(zero_sunshine_df['time'])
history_df['time'] = pd.to_datetime(history_df['time'])

# Base API URL
base_url = "https://archive-api.open-meteo.com/v1/archive"

# Location coordinates
params = {
    'latitude': 22.5626,
    'longitude': 88.363,
    'daily': 'temperature_2m_mean,temperature_2m_max,temperature_2m_min,sunshine_duration,daylight_duration,shortwave_radiation_sum',
    'timezone': 'GMT'
}

# Function to fetch data with retry logic and exponential backoff
def fetch_api_data(start_date, end_date, max_retries=3):
    params_copy = params.copy()
    params_copy['start_date'] = start_date
    params_copy['end_date'] = end_date
    
    for attempt in range(max_retries):
        try:
            response = requests.get(base_url, params=params_copy, timeout=30)
            
            # Handle rate limiting
            if response.status_code == 429:
                wait_time = 2 ** attempt  # Exponential backoff
                print(f"  ‚ö†Ô∏è  Rate limited. Waiting {wait_time}s before retry...")
                time.sleep(wait_time)
                continue
            
            response.raise_for_status()
            data = response.json()
            
            # Convert to DataFrame
            df = pd.DataFrame({
                'time': pd.to_datetime(data['daily']['time']),
                'temperature_2m_mean (¬∞C)': data['daily']['temperature_2m_mean'],
                'temperature_2m_max (¬∞C)': data['daily']['temperature_2m_max'],
                'temperature_2m_min (¬∞C)': data['daily']['temperature_2m_min'],
                'sunshine_duration (s)': data['daily']['sunshine_duration'],
                'daylight_duration (s)': data['daily']['daylight_duration'],
                'shortwave_radiation_sum (MJ/m¬≤)': data['daily']['shortwave_radiation_sum']
            })
            
            return df
            
        except requests.exceptions.Timeout:
            print(f"  ‚ö†Ô∏è  Timeout on attempt {attempt + 1}/{max_retries}")
            if attempt < max_retries - 1:
                time.sleep(2 ** attempt)
        except Exception as e:
            print(f"  ‚ùå Error: {e}")
            if attempt < max_retries - 1:
                time.sleep(2 ** attempt)
    
    return None

# Optimize: Group by YEAR instead of month (fewer API calls)
# Open-Meteo allows up to 1 year of data per request
zero_sunshine_df['year'] = zero_sunshine_df['time'].dt.year
years = sorted(zero_sunshine_df['year'].unique())

updated_count = 0
corrections = []

print(f"üìä Processing {len(zero_sunshine_df)} rows across {len(years)} years")
print(f"üåê Making approximately {len(years)} API calls...\n")

for year in tqdm(years, desc="Fetching data by year"):
    # Get all dates for this year
    year_data = zero_sunshine_df[zero_sunshine_df['year'] == year]
    start_date = f"{year}-01-01"
    end_date = f"{year}-12-31"
    
    print(f"\nüìÖ Fetching year {year} ({len(year_data)} days to check)...")
    
    api_data = fetch_api_data(start_date, end_date)
    
    if api_data is not None:
        # Update history_df with API data for matching dates
        for date in year_data['time']:
            api_row = api_data[api_data['time'] == date]
            
            if not api_row.empty:
                api_row = api_row.iloc[0]
                hist_mask = history_df['time'] == date
                
                if hist_mask.any():
                    old_sunshine = history_df.loc[hist_mask, 'sunshine_duration (s)'].values[0]
                    new_sunshine = api_row['sunshine_duration (s)']
                    
                    # Only update if old value was 0 and new value is different
                    if old_sunshine == 0.0 and new_sunshine is not None:
                        # Update all columns
                        history_df.loc[hist_mask, 'sunshine_duration (s)'] = new_sunshine
                        history_df.loc[hist_mask, 'temperature_2m_mean (¬∞C)'] = api_row['temperature_2m_mean (¬∞C)']
                        history_df.loc[hist_mask, 'temperature_2m_max (¬∞C)'] = api_row['temperature_2m_max (¬∞C)']
                        history_df.loc[hist_mask, 'temperature_2m_min (¬∞C)'] = api_row['temperature_2m_min (¬∞C)']
                        history_df.loc[hist_mask, 'daylight_duration (s)'] = api_row['daylight_duration (s)']
                        history_df.loc[hist_mask, 'shortwave_radiation_sum (MJ/m¬≤)'] = api_row['shortwave_radiation_sum (MJ/m¬≤)']
                        
                        corrections.append({
                            'date': date.strftime('%Y-%m-%d'),
                            'old_sunshine': old_sunshine,
                            'new_sunshine': new_sunshine,
                            'difference': new_sunshine - old_sunshine
                        })
                        updated_count += 1
                        print(f"  ‚úÖ {date.strftime('%Y-%m-%d')}: {old_sunshine} ‚Üí {new_sunshine}")
    
    # Rate limit: Wait 1 second between requests (Open-Meteo free tier allows ~10,000 calls/day)
    if year != years[-1]:  # Don't wait after the last request
        time.sleep(1)

# Save results
print(f"\nüíæ Saving results...")
history_df.to_csv('data/history_corrected.csv', index=False)

if corrections:
    corrections_df = pd.DataFrame(corrections)
    corrections_df.to_csv('data/corrections_log.csv', index=False)
    
    print(f"\n‚úÖ SUCCESS!")
    print(f"üìù Updated {updated_count} rows")
    print(f"üìÇ Corrected history: data/history_corrected.csv")
    print(f"üìã Corrections log: data/corrections_log.csv")
    print(f"\nüìä Statistics:")
    print(f"   - Total sunshine added: {corrections_df['difference'].sum():.2f} seconds")
    print(f"   - Average correction: {corrections_df['difference'].mean():.2f} seconds")
else:
    print(f"\n‚ö†Ô∏è  No corrections needed - all zero values confirmed!")

üìä Processing 172 rows across 32 years
üåê Making approximately 32 API calls...



Fetching data by year:   0%|          | 0/32 [00:00<?, ?it/s]


üìÖ Fetching year 1994 (2 days to check)...
  ‚úÖ 1994-05-24: 0.0 ‚Üí 0.0
  ‚úÖ 1994-06-28: 0.0 ‚Üí 0.0


Fetching data by year:   3%|‚ñé         | 1/32 [00:03<01:38,  3.18s/it]


üìÖ Fetching year 1995 (7 days to check)...
  ‚úÖ 1995-05-10: 0.0 ‚Üí 0.0
  ‚úÖ 1995-05-13: 0.0 ‚Üí 0.0
  ‚úÖ 1995-05-16: 0.0 ‚Üí 0.0
  ‚úÖ 1995-07-19: 0.0 ‚Üí 0.0
  ‚úÖ 1995-10-30: 0.0 ‚Üí 0.0
  ‚úÖ 1995-11-01: 0.0 ‚Üí 0.0
  ‚úÖ 1995-11-09: 0.0 ‚Üí 0.0


Fetching data by year:   6%|‚ñã         | 2/32 [00:05<01:22,  2.75s/it]


üìÖ Fetching year 1996 (8 days to check)...
  ‚úÖ 1996-06-19: 0.0 ‚Üí 0.0
  ‚úÖ 1996-06-20: 0.0 ‚Üí 0.0
  ‚úÖ 1996-06-26: 0.0 ‚Üí 0.0
  ‚úÖ 1996-06-30: 0.0 ‚Üí 0.0
  ‚úÖ 1996-08-07: 0.0 ‚Üí 0.0
  ‚úÖ 1996-08-21: 0.0 ‚Üí 0.0
  ‚úÖ 1996-10-27: 0.0 ‚Üí 0.0
  ‚úÖ 1996-10-28: 0.0 ‚Üí 0.0


Fetching data by year:   9%|‚ñâ         | 3/32 [00:08<01:19,  2.74s/it]


üìÖ Fetching year 1997 (4 days to check)...
  ‚úÖ 1997-07-19: 0.0 ‚Üí 0.0
  ‚úÖ 1997-07-20: 0.0 ‚Üí 0.0
  ‚úÖ 1997-09-06: 0.0 ‚Üí 0.0
  ‚úÖ 1997-09-26: 0.0 ‚Üí 0.0


Fetching data by year:  12%|‚ñà‚ñé        | 4/32 [00:10<01:12,  2.57s/it]


üìÖ Fetching year 1998 (7 days to check)...
  ‚úÖ 1998-08-13: 0.0 ‚Üí 0.0
  ‚úÖ 1998-09-08: 0.0 ‚Üí 0.0
  ‚úÖ 1998-09-10: 0.0 ‚Üí 0.0
  ‚úÖ 1998-09-11: 0.0 ‚Üí 0.0
  ‚úÖ 1998-09-12: 0.0 ‚Üí 0.0
  ‚úÖ 1998-10-20: 0.0 ‚Üí 0.0
  ‚úÖ 1998-11-22: 0.0 ‚Üí 0.0


Fetching data by year:  16%|‚ñà‚ñå        | 5/32 [00:13<01:13,  2.71s/it]


üìÖ Fetching year 1999 (2 days to check)...
  ‚úÖ 1999-08-15: 0.0 ‚Üí 0.0
  ‚úÖ 1999-10-17: 0.0 ‚Üí 0.0


Fetching data by year:  19%|‚ñà‚ñâ        | 6/32 [00:16<01:13,  2.83s/it]


üìÖ Fetching year 2000 (2 days to check)...
  ‚úÖ 2000-08-31: 0.0 ‚Üí 0.0
  ‚úÖ 2000-09-18: 0.0 ‚Üí 0.0


Fetching data by year:  22%|‚ñà‚ñà‚ñè       | 7/32 [00:19<01:08,  2.72s/it]


üìÖ Fetching year 2001 (3 days to check)...
  ‚úÖ 2001-06-02: 0.0 ‚Üí 0.0
  ‚úÖ 2001-07-02: 0.0 ‚Üí 0.0
  ‚úÖ 2001-11-11: 0.0 ‚Üí 0.0


Fetching data by year:  25%|‚ñà‚ñà‚ñå       | 8/32 [00:21<01:02,  2.60s/it]


üìÖ Fetching year 2002 (5 days to check)...
  ‚úÖ 2002-06-09: 0.0 ‚Üí 0.0
  ‚úÖ 2002-06-24: 0.0 ‚Üí 0.0
  ‚úÖ 2002-09-10: 0.0 ‚Üí 0.0
  ‚úÖ 2002-11-11: 0.0 ‚Üí 0.0
  ‚úÖ 2002-11-12: 0.0 ‚Üí 0.0


Fetching data by year:  28%|‚ñà‚ñà‚ñä       | 9/32 [00:23<00:58,  2.53s/it]


üìÖ Fetching year 2003 (3 days to check)...
  ‚úÖ 2003-06-19: 0.0 ‚Üí 0.0
  ‚úÖ 2003-10-07: 0.0 ‚Üí 0.0
  ‚úÖ 2003-10-08: 0.0 ‚Üí 0.0


Fetching data by year:  31%|‚ñà‚ñà‚ñà‚ñè      | 10/32 [00:26<00:53,  2.42s/it]


üìÖ Fetching year 2004 (5 days to check)...
  ‚úÖ 2004-06-15: 0.0 ‚Üí 0.0
  ‚úÖ 2004-09-12: 0.0 ‚Üí 0.0
  ‚úÖ 2004-09-13: 0.0 ‚Üí 0.0
  ‚úÖ 2004-09-14: 0.0 ‚Üí 0.0
  ‚úÖ 2004-09-16: 0.0 ‚Üí 0.0


Fetching data by year:  34%|‚ñà‚ñà‚ñà‚ñç      | 11/32 [00:28<00:48,  2.33s/it]


üìÖ Fetching year 2005 (9 days to check)...
  ‚úÖ 2005-06-28: 0.0 ‚Üí 0.0
  ‚úÖ 2005-07-04: 0.0 ‚Üí 0.0
  ‚úÖ 2005-07-13: 0.0 ‚Üí 0.0
  ‚úÖ 2005-07-14: 0.0 ‚Üí 0.0
  ‚úÖ 2005-10-02: 0.0 ‚Üí 0.0
  ‚úÖ 2005-10-19: 0.0 ‚Üí 0.0
  ‚úÖ 2005-10-20: 0.0 ‚Üí 0.0
  ‚úÖ 2005-10-21: 0.0 ‚Üí 0.0
  ‚úÖ 2005-12-22: 0.0 ‚Üí 0.0


Fetching data by year:  38%|‚ñà‚ñà‚ñà‚ñä      | 12/32 [00:30<00:47,  2.39s/it]


üìÖ Fetching year 2006 (4 days to check)...
  ‚úÖ 2006-06-10: 0.0 ‚Üí 0.0
  ‚úÖ 2006-07-09: 0.0 ‚Üí 0.0
  ‚úÖ 2006-09-20: 0.0 ‚Üí 0.0
  ‚úÖ 2006-11-10: 0.0 ‚Üí 0.0


Fetching data by year:  41%|‚ñà‚ñà‚ñà‚ñà      | 13/32 [00:33<00:48,  2.56s/it]


üìÖ Fetching year 2007 (6 days to check)...
  ‚úÖ 2007-06-13: 0.0 ‚Üí 0.0
  ‚úÖ 2007-07-04: 0.0 ‚Üí 0.0
  ‚úÖ 2007-08-17: 0.0 ‚Üí 0.0
  ‚úÖ 2007-09-22: 0.0 ‚Üí 0.0
  ‚úÖ 2007-09-23: 0.0 ‚Üí 0.0
  ‚úÖ 2007-09-24: 0.0 ‚Üí 0.0


Fetching data by year:  44%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 14/32 [00:36<00:49,  2.77s/it]


üìÖ Fetching year 2008 (5 days to check)...
  ‚úÖ 2008-06-17: 0.0 ‚Üí 0.0
  ‚úÖ 2008-06-19: 0.0 ‚Üí 0.0
  ‚úÖ 2008-09-17: 0.0 ‚Üí 0.0
  ‚úÖ 2008-10-24: 0.0 ‚Üí 0.0
  ‚úÖ 2008-10-26: 0.0 ‚Üí 0.0


Fetching data by year:  47%|‚ñà‚ñà‚ñà‚ñà‚ñã     | 15/32 [00:39<00:45,  2.66s/it]


üìÖ Fetching year 2009 (5 days to check)...
  ‚úÖ 2009-05-25: 0.0 ‚Üí 0.0
  ‚úÖ 2009-09-05: 0.0 ‚Üí 0.0
  ‚úÖ 2009-09-06: 0.0 ‚Üí 0.0
  ‚úÖ 2009-09-08: 0.0 ‚Üí 0.0
  ‚úÖ 2009-09-09: 0.0 ‚Üí 0.0


Fetching data by year:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 16/32 [00:41<00:40,  2.50s/it]


üìÖ Fetching year 2010 (4 days to check)...
  ‚úÖ 2010-10-06: 0.0 ‚Üí 0.0
  ‚úÖ 2010-10-07: 0.0 ‚Üí 0.0
  ‚úÖ 2010-10-08: 0.0 ‚Üí 0.0
  ‚úÖ 2010-12-06: 0.0 ‚Üí 0.0


Fetching data by year:  53%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 17/32 [00:43<00:37,  2.51s/it]


üìÖ Fetching year 2011 (11 days to check)...
  ‚úÖ 2011-06-17: 0.0 ‚Üí 0.0
  ‚úÖ 2011-06-18: 0.0 ‚Üí 0.0
  ‚úÖ 2011-06-26: 0.0 ‚Üí 0.0
  ‚úÖ 2011-07-03: 0.0 ‚Üí 0.0
  ‚úÖ 2011-07-21: 0.0 ‚Üí 0.0
  ‚úÖ 2011-08-07: 0.0 ‚Üí 0.0
  ‚úÖ 2011-08-08: 0.0 ‚Üí 0.0
  ‚úÖ 2011-08-09: 0.0 ‚Üí 0.0
  ‚úÖ 2011-08-10: 0.0 ‚Üí 0.0
  ‚úÖ 2011-08-18: 0.0 ‚Üí 0.0
  ‚úÖ 2011-09-01: 0.0 ‚Üí 0.0


Fetching data by year:  56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã    | 18/32 [00:46<00:36,  2.58s/it]


üìÖ Fetching year 2012 (4 days to check)...
  ‚úÖ 2012-06-23: 0.0 ‚Üí 0.0
  ‚úÖ 2012-06-24: 0.0 ‚Üí 0.0
  ‚úÖ 2012-09-14: 0.0 ‚Üí 0.0
  ‚úÖ 2012-11-03: 0.0 ‚Üí 0.0


Fetching data by year:  59%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 19/32 [00:49<00:33,  2.61s/it]


üìÖ Fetching year 2013 (6 days to check)...
  ‚úÖ 2013-06-10: 0.0 ‚Üí 0.0
  ‚úÖ 2013-06-30: 0.0 ‚Üí 0.0
  ‚úÖ 2013-07-28: 0.0 ‚Üí 0.0
  ‚úÖ 2013-08-27: 0.0 ‚Üí 0.0
  ‚úÖ 2013-10-12: 0.0 ‚Üí 0.0
  ‚úÖ 2013-10-25: 0.0 ‚Üí 0.0


Fetching data by year:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 20/32 [00:51<00:29,  2.50s/it]


üìÖ Fetching year 2014 (6 days to check)...
  ‚úÖ 2014-06-21: 0.0 ‚Üí 0.0
  ‚úÖ 2014-07-01: 0.0 ‚Üí 0.0
  ‚úÖ 2014-07-03: 0.0 ‚Üí 0.0
  ‚úÖ 2014-08-15: 0.0 ‚Üí 0.0
  ‚úÖ 2014-09-20: 0.0 ‚Üí 0.0
  ‚úÖ 2014-10-27: 0.0 ‚Üí 0.0


Fetching data by year:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 21/32 [00:53<00:26,  2.42s/it]


üìÖ Fetching year 2015 (11 days to check)...
  ‚úÖ 2015-01-01: 0.0 ‚Üí 0.0
  ‚úÖ 2015-06-25: 0.0 ‚Üí 0.0
  ‚úÖ 2015-06-26: 0.0 ‚Üí 0.0
  ‚úÖ 2015-06-27: 0.0 ‚Üí 0.0
  ‚úÖ 2015-07-10: 0.0 ‚Üí 0.0
  ‚úÖ 2015-07-25: 0.0 ‚Üí 0.0
  ‚úÖ 2015-07-27: 0.0 ‚Üí 0.0
  ‚úÖ 2015-07-29: 0.0 ‚Üí 0.0
  ‚úÖ 2015-08-02: 0.0 ‚Üí 0.0
  ‚úÖ 2015-09-23: 0.0 ‚Üí 0.0
  ‚úÖ 2015-12-18: 0.0 ‚Üí 0.0


Fetching data by year:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ   | 22/32 [00:56<00:24,  2.41s/it]


üìÖ Fetching year 2016 (7 days to check)...
  ‚úÖ 2016-05-20: 0.0 ‚Üí 0.0
  ‚úÖ 2016-07-16: 0.0 ‚Üí 0.0
  ‚úÖ 2016-07-17: 0.0 ‚Üí 0.0
  ‚úÖ 2016-08-06: 0.0 ‚Üí 0.0
  ‚úÖ 2016-08-08: 0.0 ‚Üí 0.0
  ‚úÖ 2016-08-21: 0.0 ‚Üí 0.0
  ‚úÖ 2016-09-06: 0.0 ‚Üí 0.0


Fetching data by year:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 23/32 [00:58<00:22,  2.48s/it]


üìÖ Fetching year 2017 (13 days to check)...
  ‚úÖ 2017-06-12: 0.0 ‚Üí 0.0
  ‚úÖ 2017-06-20: 0.0 ‚Üí 0.0
  ‚úÖ 2017-07-22: 0.0 ‚Üí 0.0
  ‚úÖ 2017-07-23: 0.0 ‚Üí 0.0
  ‚úÖ 2017-07-24: 0.0 ‚Üí 0.0
  ‚úÖ 2017-07-25: 0.0 ‚Üí 0.0
  ‚úÖ 2017-10-08: 0.0 ‚Üí 0.0
  ‚úÖ 2017-10-09: 0.0 ‚Üí 0.0
  ‚úÖ 2017-10-20: 0.0 ‚Üí 0.0
  ‚úÖ 2017-11-15: 0.0 ‚Üí 0.0
  ‚úÖ 2017-12-08: 0.0 ‚Üí 0.0
  ‚úÖ 2017-12-09: 0.0 ‚Üí 0.0
  ‚úÖ 2017-12-10: 0.0 ‚Üí 0.0


Fetching data by year:  75%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 24/32 [01:01<00:20,  2.61s/it]


üìÖ Fetching year 2018 (2 days to check)...
  ‚úÖ 2018-07-03: 0.0 ‚Üí 0.0
  ‚úÖ 2018-12-16: 0.0 ‚Üí 0.0


Fetching data by year:  78%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 25/32 [01:04<00:19,  2.72s/it]


üìÖ Fetching year 2019 (5 days to check)...
  ‚úÖ 2019-09-25: 0.0 ‚Üí 0.0
  ‚úÖ 2019-10-24: 0.0 ‚Üí 0.0
  ‚úÖ 2019-10-25: 0.0 ‚Üí 0.0
  ‚úÖ 2019-11-08: 0.0 ‚Üí 0.0
  ‚úÖ 2019-11-09: 0.0 ‚Üí 0.0


Fetching data by year:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 26/32 [01:06<00:15,  2.56s/it]


üìÖ Fetching year 2020 (4 days to check)...
  ‚úÖ 2020-05-20: 0.0 ‚Üí 0.0
  ‚úÖ 2020-06-17: 0.0 ‚Üí 0.0
  ‚úÖ 2020-08-04: 0.0 ‚Üí 0.0
  ‚úÖ 2020-09-01: 0.0 ‚Üí 0.0


Fetching data by year:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 27/32 [01:09<00:13,  2.64s/it]


üìÖ Fetching year 2021 (10 days to check)...
  ‚úÖ 2021-06-17: 0.0 ‚Üí 0.0
  ‚úÖ 2021-06-18: 0.0 ‚Üí 0.0
  ‚úÖ 2021-09-19: 0.0 ‚Üí 0.0
  ‚úÖ 2021-09-20: 0.0 ‚Üí 0.0
  ‚úÖ 2021-09-21: 0.0 ‚Üí 0.0
  ‚úÖ 2021-10-18: 0.0 ‚Üí 0.0
  ‚úÖ 2021-10-19: 0.0 ‚Üí 0.0
  ‚úÖ 2021-12-04: 0.0 ‚Üí 0.0
  ‚úÖ 2021-12-05: 0.0 ‚Üí 0.0
  ‚úÖ 2021-12-06: 0.0 ‚Üí 0.0


Fetching data by year:  88%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 28/32 [01:12<00:10,  2.62s/it]


üìÖ Fetching year 2022 (5 days to check)...
  ‚úÖ 2022-08-14: 0.0 ‚Üí 0.0
  ‚úÖ 2022-09-11: 0.0 ‚Üí 0.0
  ‚úÖ 2022-09-12: 0.0 ‚Üí 0.0
  ‚úÖ 2022-09-14: 0.0 ‚Üí 0.0
  ‚úÖ 2022-10-24: 0.0 ‚Üí 0.0


Fetching data by year:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 29/32 [01:14<00:07,  2.50s/it]


üìÖ Fetching year 2023 (1 days to check)...
  ‚úÖ 2023-11-16: 0.0 ‚Üí 0.0


Fetching data by year:  94%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç| 30/32 [01:16<00:04,  2.43s/it]


üìÖ Fetching year 2024 (5 days to check)...
  ‚úÖ 2024-05-26: 0.0 ‚Üí 0.0
  ‚úÖ 2024-05-27: 0.0 ‚Üí 0.0
  ‚úÖ 2024-08-01: 0.0 ‚Üí 0.0
  ‚úÖ 2024-09-15: 0.0 ‚Üí 0.0
  ‚úÖ 2024-10-25: 0.0 ‚Üí 0.0


Fetching data by year:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 31/32 [01:19<00:02,  2.47s/it]


üìÖ Fetching year 2025 (1 days to check)...
  ‚ùå Error: 400 Client Error: Bad Request for url: https://archive-api.open-meteo.com/v1/archive?latitude=22.5626&longitude=88.363&daily=temperature_2m_mean%2Ctemperature_2m_max%2Ctemperature_2m_min%2Csunshine_duration%2Cdaylight_duration%2Cshortwave_radiation_sum&timezone=GMT&start_date=2025-01-01&end_date=2025-12-31
  ‚ùå Error: 400 Client Error: Bad Request for url: https://archive-api.open-meteo.com/v1/archive?latitude=22.5626&longitude=88.363&daily=temperature_2m_mean%2Ctemperature_2m_max%2Ctemperature_2m_min%2Csunshine_duration%2Cdaylight_duration%2Cshortwave_radiation_sum&timezone=GMT&start_date=2025-01-01&end_date=2025-12-31


Fetching data by year: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 32/32 [01:27<00:00,  2.72s/it]

  ‚ùå Error: 400 Client Error: Bad Request for url: https://archive-api.open-meteo.com/v1/archive?latitude=22.5626&longitude=88.363&daily=temperature_2m_mean%2Ctemperature_2m_max%2Ctemperature_2m_min%2Csunshine_duration%2Cdaylight_duration%2Cshortwave_radiation_sum&timezone=GMT&start_date=2025-01-01&end_date=2025-12-31

üíæ Saving results...






‚úÖ SUCCESS!
üìù Updated 171 rows
üìÇ Corrected history: data/history_corrected.csv
üìã Corrections log: data/corrections_log.csv

üìä Statistics:
   - Total sunshine added: 0.00 seconds
   - Average correction: 0.00 seconds


In [13]:
import pandas as pd
import requests
from datetime import datetime
import time

# Read the zero sunshine days
zero_sunshine_df = pd.read_csv('data/zero_sunshine_days.csv')

# Read the main history file
history_df = pd.read_csv('data/history.csv', comment='#', header=None, skiprows=1)
history_df.columns = ['time', 'temperature_2m_mean (¬∞C)', 'temperature_2m_max (¬∞C)', 
                      'temperature_2m_min (¬∞C)', 'sunshine_duration (s)', 
                      'daylight_duration (s)', 'shortwave_radiation_sum (MJ/m¬≤)']

# Ensure 'time' column is datetime
zero_sunshine_df['time'] = pd.to_datetime(zero_sunshine_df['time'])

# Group dates into ranges for API efficiency (API allows date ranges)
dates_to_check = zero_sunshine_df['time'].dt.date.tolist()

# Base API URL
base_url = "https://archive-api.open-meteo.com/v1/archive"

# Location coordinates
params = {
    'latitude': 22.5626,
    'longitude': 88.363,
    'daily': 'temperature_2m_mean,temperature_2m_max,temperature_2m_min,sunshine_duration,daylight_duration,shortwave_radiation_sum',
    'timezone': 'GMT'
}

# Function to fetch data for a date range
def fetch_api_data(start_date, end_date):
    params['start_date'] = start_date
    params['end_date'] = end_date
    
    try:
        response = requests.get(base_url, params=params)
        response.raise_for_status()
        data = response.json()
        
        # Convert to DataFrame
        df = pd.DataFrame({
            'time': data['daily']['time'],
            'temperature_2m_mean (¬∞C)': data['daily']['temperature_2m_mean'],
            'temperature_2m_max (¬∞C)': data['daily']['temperature_2m_max'],
            'temperature_2m_min (¬∞C)': data['daily']['temperature_2m_min'],
            'sunshine_duration (s)': data['daily']['sunshine_duration'],
            'daylight_duration (s)': data['daily']['daylight_duration'],
            'shortwave_radiation_sum (MJ/m¬≤)': data['daily']['shortwave_radiation_sum']
        })
        
        return df
    except Exception as e:
        print(f"Error fetching data for {start_date} to {end_date}: {e}")
        return None

# Process dates in monthly chunks to minimize API calls
updated_count = 0
corrections = []

# Group by year-month
zero_sunshine_df['year_month'] = zero_sunshine_df['time'].dt.to_period('M')
grouped = zero_sunshine_df.groupby('year_month')

for period, group in grouped:
    start_date = group['time'].min().strftime('%Y-%m-%d')
    end_date = group['time'].max().strftime('%Y-%m-%d')
    
    print(f"Fetching data for {start_date} to {end_date}...")
    
    api_data = fetch_api_data(start_date, end_date)
    
    if api_data is not None:
        api_data['time'] = pd.to_datetime(api_data['time'])
        
        # Update history_df with API data for these dates
        for idx, row in api_data.iterrows():
            date = row['time']
            
            # Find matching row in history_df
            mask = pd.to_datetime(history_df['time']) == date
            
            if mask.any():
                old_sunshine = history_df.loc[mask, 'sunshine_duration (s)'].values[0]
                new_sunshine = row['sunshine_duration (s)']
                
                if old_sunshine == 0.0 and new_sunshine != 0.0:
                    # Update the values
                    history_df.loc[mask, 'sunshine_duration (s)'] = new_sunshine
                    history_df.loc[mask, 'temperature_2m_mean (¬∞C)'] = row['temperature_2m_mean (¬∞C)']
                    history_df.loc[mask, 'temperature_2m_max (¬∞C)'] = row['temperature_2m_max (¬∞C)']
                    history_df.loc[mask, 'temperature_2m_min (¬∞C)'] = row['temperature_2m_min (¬∞C)']
                    history_df.loc[mask, 'daylight_duration (s)'] = row['daylight_duration (s)']
                    history_df.loc[mask, 'shortwave_radiation_sum (MJ/m¬≤)'] = row['shortwave_radiation_sum (MJ/m¬≤)']
                    
                    corrections.append({
                        'date': date.strftime('%Y-%m-%d'),
                        'old_sunshine': old_sunshine,
                        'new_sunshine': new_sunshine
                    })
                    updated_count += 1
                    print(f"  Updated {date.strftime('%Y-%m-%d')}: {old_sunshine} ‚Üí {new_sunshine}")
    
    # Be respectful to the API - add a small delay
    time.sleep(0.5)

# Save corrected history
history_df.to_csv('data/history_corrected.csv', index=False)

# Save correction log
if corrections:
    corrections_df = pd.DataFrame(corrections)
    corrections_df.to_csv('data/corrections_log.csv', index=False)

print(f"\n‚úÖ Correction complete!")
print(f"Updated {updated_count} rows")
print(f"Corrected history saved to: data/history_corrected.csv")
print(f"Corrections log saved to: data/corrections_log.csv")

Fetching data for 1994-05-24 to 1994-05-24...
Fetching data for 1994-06-28 to 1994-06-28...
Fetching data for 1995-05-10 to 1995-05-16...
Fetching data for 1995-07-19 to 1995-07-19...
Fetching data for 1995-10-30 to 1995-10-30...
Fetching data for 1995-11-01 to 1995-11-09...
Fetching data for 1996-06-19 to 1996-06-30...
Fetching data for 1996-08-07 to 1996-08-21...
Fetching data for 1996-10-27 to 1996-10-28...
Fetching data for 1997-07-19 to 1997-07-20...
Fetching data for 1997-09-06 to 1997-09-26...
Fetching data for 1998-08-13 to 1998-08-13...
Fetching data for 1998-09-08 to 1998-09-12...
Fetching data for 1998-10-20 to 1998-10-20...
Fetching data for 1998-11-22 to 1998-11-22...
Fetching data for 1999-08-15 to 1999-08-15...
Fetching data for 1999-10-17 to 1999-10-17...
Fetching data for 2000-08-31 to 2000-08-31...
Fetching data for 2000-09-18 to 2000-09-18...
Fetching data for 2001-06-02 to 2001-06-02...
Fetching data for 2001-07-02 to 2001-07-02...
Fetching data for 2001-11-11 to 20

In [14]:
import pandas as pd
import requests

# Read the zero sunshine days
zero_sunshine_df = pd.read_csv('data/zero_sunshine_days.csv')

# Get the first date with zero sunshine
test_date = zero_sunshine_df['time'].iloc[0]
print(f"üîç Testing with date: {test_date}")

# Read your current history data for comparison
history_df = pd.read_csv('data/history.csv', comment='#', header=None, skiprows=1)
history_df.columns = ['time', 'temperature_2m_mean (¬∞C)', 'temperature_2m_max (¬∞C)', 
                      'temperature_2m_min (¬∞C)', 'sunshine_duration (s)', 
                      'daylight_duration (s)', 'shortwave_radiation_sum (MJ/m¬≤)']

# Get the existing values for this date
existing_row = history_df[history_df['time'] == test_date]

print("\nüìä YOUR CURRENT DATA:")
print(existing_row.to_string(index=False))

# Fetch from API
api_url = "https://archive-api.open-meteo.com/v1/archive"
params = {
    'latitude': 22.5626,
    'longitude': 88.363,
    'start_date': test_date,
    'end_date': test_date,
    'daily': 'temperature_2m_mean,temperature_2m_max,temperature_2m_min,sunshine_duration,daylight_duration,shortwave_radiation_sum',
    'timezone': 'GMT'
}

try:
    response = requests.get(api_url, params=params)
    response.raise_for_status()
    data = response.json()
    
    print("\nüåê API DATA:")
    print(f"Date: {data['daily']['time'][0]}")
    print(f"Temperature Mean: {data['daily']['temperature_2m_mean'][0]} ¬∞C")
    print(f"Temperature Max: {data['daily']['temperature_2m_max'][0]} ¬∞C")
    print(f"Temperature Min: {data['daily']['temperature_2m_min'][0]} ¬∞C")
    print(f"Sunshine Duration: {data['daily']['sunshine_duration'][0]} s")
    print(f"Daylight Duration: {data['daily']['daylight_duration'][0]} s")
    print(f"Shortwave Radiation: {data['daily']['shortwave_radiation_sum'][0]} MJ/m¬≤")
    
    # Compare
    print("\nüîÑ COMPARISON:")
    your_sunshine = existing_row['sunshine_duration (s)'].values[0] if not existing_row.empty else None
    api_sunshine = data['daily']['sunshine_duration'][0]
    
    print(f"Your sunshine_duration: {your_sunshine} s")
    print(f"API sunshine_duration: {api_sunshine} s")
    
    if your_sunshine == 0.0 and api_sunshine != 0.0:
        print(f"‚úÖ CHANGE DETECTED! API has non-zero value: {api_sunshine} s")
    elif your_sunshine == api_sunshine:
        print(f"‚ö†Ô∏è  NO CHANGE - Both have same value: {your_sunshine} s")
    else:
        print(f"üìù Different values found")
        
except Exception as e:
    print(f"‚ùå Error fetching API data: {e}")

üîç Testing with date: 1994-05-24

üìä YOUR CURRENT DATA:
      time  temperature_2m_mean (¬∞C)  temperature_2m_max (¬∞C)  temperature_2m_min (¬∞C)  sunshine_duration (s)  daylight_duration (s)  shortwave_radiation_sum (MJ/m¬≤)
1994-05-24                      26.0                     28.9                     24.4                    0.0               48016.72                             5.98

üåê API DATA:
Date: 1994-05-24
Temperature Mean: 26.0 ¬∞C
Temperature Max: 28.9 ¬∞C
Temperature Min: 24.4 ¬∞C
Sunshine Duration: 0.0 s
Daylight Duration: 48016.72 s
Shortwave Radiation: 5.98 MJ/m¬≤

üîÑ COMPARISON:
Your sunshine_duration: 0.0 s
API sunshine_duration: 0.0 s
‚ö†Ô∏è  NO CHANGE - Both have same value: 0.0 s


In [16]:
# Rename columns (modify keys if needed)
df = df.rename(columns={
    "time": "date",
    "temperature_2m_mean": "temp_mean",
    "temperature_2m_max": "temp_max",
    "temperature_2m_min": "temp_min"
})

print(df.head())


         date  temperature_2m_mean (¬∞C)  temperature_2m_max (¬∞C)  \
0  1994-01-01                      20.0                     27.2   
1  1994-01-02                      20.0                     27.1   
2  1994-01-03                      20.7                     28.2   
3  1994-01-04                      21.0                     27.7   
4  1994-01-05                      21.6                     28.0   

   temperature_2m_min (¬∞C)  sunshine_duration (s)  daylight_duration (s)  \
0                     13.9               35388.50               38813.64   
1                     13.4               35337.72               38834.01   
2                     14.2               35348.36               38856.03   
3                     14.4               35392.11               38879.66   
4                     14.7               35311.84               38904.88   

   shortwave_radiation_sum (MJ/m¬≤)  
0                            16.70  
1                            16.64  
2                  