In [1]:
import requests
import pandas as pd

def get_climatic_data(lat, lng, date):
    """
    Fetches daily climatic data for a specific coordinate and date.
    'date' should be a string in 'YYYY-MM-DD' format or a datetime object.
    """
    # Ensure date is in string format for the API
    date_str = pd.to_datetime(date).strftime('%Y-%m-%d')
    
    url = "https://archive-api.open-meteo.com/v1/era5"
    
    params = {
        "latitude": lat,
        "longitude": lng,
        "start_date": date_str,
        "end_date": date_str,
        "daily": (
            "temperature_2m_mean,"
            "temperature_2m_max,"
            "dewpoint_2m_mean,"
            "relative_humidity_2m_mean,"
            "wind_speed_10m_mean,"
            "wind_gusts_10m_max,"
            "precipitation_sum,"
            "shortwave_radiation_sum"
        ),
        "timezone": "Asia/Kolkata"
    }

    try:
        response = requests.get(url, params=params)
        response.raise_for_status() # Check for request errors
        data = response.json()
        
        # Extract the 'daily' dictionary (contains lists of 1 element since we requested 1 day)
        daily_data = data.get("daily", {})
        
        # Clean up: Return a simple dictionary instead of lists of length 1
        result = {key: value[0] for key, value in daily_data.items()}
        return result

    except Exception as e:
        print(f"Error fetching data for {lat}, {lng} on {date_str}: {e}")
        return None

# --- Example Usage ---
# lat_val = 16.50  # Andhra Pradesh
# lng_val = 80.64
# target_date = "2023-05-15"

# weather_info = get_climatic_data(lat_val, lng_val, target_date)
# print(weather_info)

In [2]:
print(get_climatic_data(16.50, 80.64,'2020-01-01'))

{'time': '2020-01-01', 'temperature_2m_mean': 24.5, 'temperature_2m_max': 28.4, 'dewpoint_2m_mean': 20.2, 'relative_humidity_2m_mean': 77, 'wind_speed_10m_mean': 13.2, 'wind_gusts_10m_max': 40.3, 'precipitation_sum': 1.4, 'shortwave_radiation_sum': 12.28}


In [3]:
import pandas as pd
import requests

def get_nasa_power_weather(lat, lon, date):
    # NASA uses YYYYMMDD format
    date_str = pd.to_datetime(date).strftime('%Y%m%d')
    
    # Parameters: T2M (Temp), RH2M (Humidity), ALLSKY_SFC_SW_DWN (Solar Radiation)
    url = (f"https://power.larc.nasa.gov/api/temporal/daily/point?"
           f"parameters=T2M_MAX,T2M_MIN,RH2M,ALLSKY_SFC_SW_DWN,PRECTOTCORR&"
           f"community=RE&longitude={lon}&latitude={lat}&"
           f"start={date_str}&end={date_str}&format=JSON")
    
    response = requests.get(url).json()
    # Extract data from the complex NASA JSON structure
    data = response['properties']['parameter']
    
    return {
        "temp_max": data['T2M_MAX'][date_str],
        "humidity": data['RH2M'][date_str],
        "solar_radiation": data['ALLSKY_SFC_SW_DWN'][date_str],
        "precipitation": data['PRECTOTCORR'][date_str]
    }

In [4]:
print(get_nasa_power_weather(16.50, 80.64,'2020-01-01'))

{'temp_max': 30.6, 'humidity': 78.1, 'solar_radiation': 4.0243, 'precipitation': 0.15}


In [6]:
import pandas as pd
import requests
import time
from urllib.parse import urlencode

# ==========================================
# STEP 1: LOAD & CALCULATE 'ACTUAL DRAWL'
# ==========================================
file_path = 'India_Elec_data_(Jan2020-Mar2025).csv'
print(f"Loading {file_path}...")
df = pd.read_csv(file_path)

# Clean column names
df.columns = df.columns.str.strip()

# Rename tricky columns
df.rename(columns={
    'Drawl Schedule': 'Drawl_Schedule',
    'OD(+) / UD(-)': 'OD_UD',
    'Max Demand Met': 'Max_Demand_Met',
    'Shortage During Peak': 'Peak_Shortage'
}, inplace=True)

# Calculate Actual_Drawl (Schedule + OD/UD)
df['Drawl_Schedule'] = pd.to_numeric(df['Drawl_Schedule'], errors='coerce').fillna(0)
df['OD_UD'] = pd.to_numeric(df['OD_UD'], errors='coerce').fillna(0)
df['Actual_Drawl'] = df['Drawl_Schedule'] + df['OD_UD']

# Ensure Date is datetime
df['Date'] = pd.to_datetime(df['Date'])

# ==========================================
# STEP 2: CITY MAPPING & COORDINATES
# ==========================================
city_map = {
    'HP': 'Shimla', 'Telangana': 'Hyderabad', 'Punjab': 'Ludhiana', 'NR UP': 'Lucknow',
    'MP': 'Bhopal', 'West Bengal': 'Kolkata', 'DD': 'Daman', 'SR Karnataka': 'Bengaluru',
    'Delhi': 'New Delhi', 'Kerala': 'Thiruvananthapuram', 'Haryana': 'Chandigarh',
    'Gujarat': 'Ahmedabad', 'Chhattisgarh': 'Raipur', 'ER Odisha': 'Bhubaneswar',
    'Andhra Pradesh': 'Vijayawada', 'Tamil Nadu': 'Chennai', 'Bihar': 'Patna',
    'WR Maharashtra': 'Mumbai', 'Assam': 'Guwahati', 'Rajasthan': 'Jaipur',
    'Essar steel': 'Hazira', 'Uttarakhand': 'Dehradun', 'DVC': 'Kolkata',
    'NER Meghalaya': 'Shillong', 'J&K(UT) & Ladakh(UT)': 'Srinagar', 'Chandigarh': 'Chandigarh',
    'Puducherry': 'Puducherry', 'Tripura': 'Agartala', 'DNH': 'Silvassa', 'Jharkhand': 'Ranchi',
    'Goa': 'Panaji', 'Mizoram': 'Aizawl', 'Nagaland': 'Kohima', 'Manipur': 'Imphal',
    'Arunachal Pradesh': 'Itanagar', 'Sikkim': 'Gangtok'
}

def get_lat_long(state_name):
    search_query = city_map.get(state_name, state_name)
    params = {
       'api_key': 'AGbFAKx58hyjQScCXIYrxuEwJh2W2cmv',
       's': search_query, 'stack': 'aws', 'locale': 'en', 'filter': 'international',
       'place-types': 'settlement,airport,district', 'order': 'importance', 'a': 'true', 'format': 'json'
    }
    try:
        url = 'https://locator-service.api.bbci.co.uk/locations?' + urlencode(params)
        result = requests.get(url).json()
        components = result['response']['results']['results'][0]
        return components['latitude'], components['longitude']
    except Exception:
        return None, None

# ==========================================
# STEP 3: BULK WEATHER FETCHING (UPDATED)
# ==========================================
def fetch_bulk_weather_custom(lat, lng, start_date, end_date):
    """
    Fetches FULL range using the specific parameters you requested.
    """
    s_str = start_date.strftime('%Y-%m-%d')
    e_str = end_date.strftime('%Y-%m-%d')
    s_nasa = start_date.strftime('%Y%m%d')
    e_nasa = end_date.strftime('%Y%m%d')

    # --- 1. Open-Meteo (All requested columns) ---
    try:
        om_url = "https://archive-api.open-meteo.com/v1/era5"
        # Using exact parameters from your snippet
        om_params = {
            "latitude": lat, "longitude": lng, 
            "start_date": s_str, "end_date": e_str, 
            "daily": "temperature_2m_mean,temperature_2m_max,dewpoint_2m_mean,relative_humidity_2m_mean,wind_speed_10m_mean,wind_gusts_10m_max,precipitation_sum,shortwave_radiation_sum",
            "timezone": "Asia/Kolkata"
        }
        om_res = requests.get(om_url, params=om_params).json()
        
        # Create DataFrame with prefix 'om_' to avoid collisions
        df_om = pd.DataFrame({
            'Date': pd.to_datetime(om_res['daily']['time']),
            'om_temp_mean': om_res['daily']['temperature_2m_mean'],
            'om_temp_max': om_res['daily']['temperature_2m_max'],
            'om_dewpoint': om_res['daily']['dewpoint_2m_mean'],
            'om_humidity': om_res['daily']['relative_humidity_2m_mean'],
            'om_wind_speed': om_res['daily']['wind_speed_10m_mean'],
            'om_wind_gusts': om_res['daily']['wind_gusts_10m_max'],
            'om_precip': om_res['daily']['precipitation_sum'],
            'om_solar': om_res['daily']['shortwave_radiation_sum']
        })
    except Exception as e:
        print(f"Open-Meteo Error: {e}")
        df_om = pd.DataFrame()

    # --- 2. NASA POWER (All requested columns) ---
    try:
        # Using exact parameters from your snippet: T2M_MAX, RH2M, ALLSKY_SFC_SW_DWN, PRECTOTCORR
        nasa_url = f"https://power.larc.nasa.gov/api/temporal/daily/point?parameters=T2M_MAX,RH2M,ALLSKY_SFC_SW_DWN,PRECTOTCORR&community=RE&longitude={lng}&latitude={lat}&start={s_nasa}&end={e_nasa}&format=JSON"
        nasa_res = requests.get(nasa_url).json()['properties']['parameter']
        
        df_nasa = pd.DataFrame({
            'Date': pd.to_datetime(list(nasa_res['T2M_MAX'].keys())),
            'nasa_temp_max': list(nasa_res['T2M_MAX'].values()),
            'nasa_humidity': list(nasa_res['RH2M'].values()),
            'nasa_solar': list(nasa_res['ALLSKY_SFC_SW_DWN'].values()),
            'nasa_precip': list(nasa_res['PRECTOTCORR'].values())
        })
    except Exception as e:
        print(f"NASA Error: {e}")
        df_nasa = pd.DataFrame()

    # Merge logic
    if df_om.empty and df_nasa.empty: return pd.DataFrame()
    elif df_om.empty: return df_nasa
    elif df_nasa.empty: return df_om
    else: return df_om.merge(df_nasa, on='Date', how='outer')

# ==========================================
# STEP 4: EXECUTION
# ==========================================
all_weather_data = []
unique_states = df['State'].unique()
start_dt, end_dt = df['Date'].min(), df['Date'].max()

print(f"Fetching extended weather variables for {len(unique_states)} states...")

for state in unique_states:
    lat, lng = get_lat_long(state)
    if lat and lng:
        print(f"  Fetching: {state}")
        # Call the new custom function
        sw = fetch_bulk_weather_custom(lat, lng, start_dt, end_dt)
        if not sw.empty:
            sw['State'] = state
            all_weather_data.append(sw)
        time.sleep(1) # Be nice to the API
    else:
        print(f"  Skipping: {state} (No coords)")

# Final Merge
if all_weather_data:
    full_weather_df = pd.concat(all_weather_data)
    final_df = df.merge(full_weather_df, on=['Date', 'State'], how='left', suffixes=('', '_drop'))
    final_df = final_df[[c for c in final_df.columns if not c.endswith('_drop')]]
    
    # Calculate Gap
    final_df['Gap'] = final_df['Actual_Drawl'] - final_df['Drawl_Schedule']
    
    # Save
    final_df.to_csv('Master_Weather_Electricity_Data.csv', index=False)
    print("\nSuccess! Master file saved with all Open-Meteo and NASA columns.")
    print("Columns added: om_temp_mean, om_wind_gusts, nasa_solar, etc.")
else:
    print("Failed to fetch weather data.")

Loading India_Elec_data_(Jan2020-Mar2025).csv...
Fetching extended weather variables for 36 states...
  Fetching: Andhra Pradesh
  Skipping: Arunachal Pradesh (No coords)
  Fetching: Assam
  Fetching: Bihar
  Fetching: Chandigarh
  Skipping: Chhattisgarh (No coords)
  Skipping: DD (No coords)
  Skipping: DNH (No coords)
  Fetching: DVC
  Fetching: Delhi
  Skipping: ER Odisha (No coords)
  Skipping: Essar steel (No coords)
  Skipping: Goa (No coords)
  Fetching: Gujarat
  Fetching: HP
  Fetching: Haryana
  Fetching: J&K(UT) & Ladakh(UT)
  Fetching: Jharkhand
  Fetching: Kerala
  Fetching: MP
  Fetching: Manipur
Open-Meteo Error: 'daily'
  Fetching: Mizoram
Open-Meteo Error: 'daily'
  Fetching: NER Meghalaya
Open-Meteo Error: 'daily'
  Fetching: NR UP
Open-Meteo Error: 'daily'
  Skipping: Nagaland (No coords)
  Fetching: Puducherry
Open-Meteo Error: 'daily'
  Fetching: Punjab
Open-Meteo Error: 'daily'
  Fetching: Rajasthan
  Skipping: SR Karnataka (No coords)
  Skipping: Sikkim (No coord

In [7]:
import pandas as pd
import requests
import time

# ==========================================
# 1. LOAD EXISTING MASTER FILE
# ==========================================
file_path = 'Master_Weather_Electricity_Data.csv'
print(f"Loading {file_path} to patch missing states...")
df = pd.read_csv(file_path)
df['Date'] = pd.to_datetime(df['Date'])

# ==========================================
# 2. DEFINE MANUAL COORDINATES FOR FAILED STATES
# ==========================================
# These are the specific states that failed in your run
FAILED_STATES = {
    # --- No Coords Errors ---
    'Arunachal Pradesh': (27.08, 93.60),
    'Chhattisgarh': (21.27, 81.63),
    'DD': (20.41, 72.83),  # Daman
    'DNH': (20.26, 73.00), # Silvassa
    'ER Odisha': (20.29, 85.82), # Bhubaneswar
    'Essar steel': (21.08, 72.63), # Hazira
    'Goa': (15.49, 73.82),
    'Nagaland': (25.67, 94.10),
    'SR Karnataka': (12.97, 77.59), # Bengaluru
    'Sikkim': (27.33, 88.61),
    'Uttarakhand': (30.31, 78.03),
    
    # --- Open-Meteo Errors (Likely Rate Limit/Coords) ---
    'Manipur': (24.81, 93.93),
    'Mizoram': (23.73, 92.71),
    'NER Meghalaya': (25.57, 91.88),
    'NR UP': (26.84, 80.95), # Lucknow
    'Puducherry': (11.94, 79.80),
    'Punjab': (30.73, 76.77)
}

# ==========================================
# 3. WEATHER FETCHING FUNCTION
# ==========================================
def fetch_weather_patch(lat, lng, start_date, end_date):
    s_str = start_date.strftime('%Y-%m-%d')
    e_str = end_date.strftime('%Y-%m-%d')
    s_nasa = start_date.strftime('%Y%m%d')
    e_nasa = end_date.strftime('%Y%m%d')

    # API 1: Open-Meteo
    try:
        om_url = "https://archive-api.open-meteo.com/v1/era5"
        om_params = {
            "latitude": lat, "longitude": lng, "start_date": s_str, "end_date": e_str, 
            "daily": "temperature_2m_mean,temperature_2m_max,dewpoint_2m_mean,relative_humidity_2m_mean,wind_speed_10m_mean,wind_gusts_10m_max,precipitation_sum,shortwave_radiation_sum",
            "timezone": "Asia/Kolkata"
        }
        # Increased timeout and added delay
        om_res = requests.get(om_url, params=om_params, timeout=20).json()
        df_om = pd.DataFrame({
            'Date': pd.to_datetime(om_res['daily']['time']),
            'om_temp_mean': om_res['daily']['temperature_2m_mean'],
            'om_temp_max': om_res['daily']['temperature_2m_max'],
            'om_dewpoint': om_res['daily']['dewpoint_2m_mean'],
            'om_humidity': om_res['daily']['relative_humidity_2m_mean'],
            'om_wind_speed': om_res['daily']['wind_speed_10m_mean'],
            'om_wind_gusts': om_res['daily']['wind_gusts_10m_max'],
            'om_precip': om_res['daily']['precipitation_sum'],
            'om_solar': om_res['daily']['shortwave_radiation_sum']
        })
    except Exception as e:
        print(f"  > Open-Meteo failed: {e}")
        df_om = pd.DataFrame()

    # API 2: NASA POWER
    try:
        nasa_url = f"https://power.larc.nasa.gov/api/temporal/daily/point?parameters=T2M_MAX,RH2M,ALLSKY_SFC_SW_DWN,PRECTOTCORR&community=RE&longitude={lng}&latitude={lat}&start={s_nasa}&end={e_nasa}&format=JSON"
        nasa_res = requests.get(nasa_url, timeout=20).json()['properties']['parameter']
        df_nasa = pd.DataFrame({
            'Date': pd.to_datetime(list(nasa_res['T2M_MAX'].keys())),
            'nasa_temp_max': list(nasa_res['T2M_MAX'].values()),
            'nasa_humidity': list(nasa_res['RH2M'].values()),
            'nasa_solar': list(nasa_res['ALLSKY_SFC_SW_DWN'].values()),
            'nasa_precip': list(nasa_res['PRECTOTCORR'].values())
        })
    except Exception as e:
        print(f"  > NASA failed: {e}")
        df_nasa = pd.DataFrame()

    if df_om.empty and df_nasa.empty: return pd.DataFrame()
    elif df_om.empty: return df_nasa
    elif df_nasa.empty: return df_om
    else: return df_om.merge(df_nasa, on='Date', how='outer')

# ==========================================
# 4. EXECUTION LOOP
# ==========================================
start_dt, end_dt = df['Date'].min(), df['Date'].max()
weather_cols = [c for c in df.columns if 'om_' in c or 'nasa_' in c]
cleaned_rows = []

print(f"Patching {len(FAILED_STATES)} states...")

for state, coords in FAILED_STATES.items():
    lat, lng = coords
    print(f"Fixing: {state} ... ", end="")
    
    # Fetch new weather
    new_weather = fetch_weather_patch(lat, lng, start_dt, end_dt)
    
    if not new_weather.empty:
        # Get the rows for this state from original DF
        state_df = df[df['State'] == state].copy()
        
        # Drop old broken weather columns if they exist
        state_df = state_df.drop(columns=[c for c in weather_cols if c in state_df.columns], errors='ignore')
        
        # Merge new weather
        merged_state = state_df.merge(new_weather, on='Date', how='left')
        cleaned_rows.append(merged_state)
        print("Done.")
    else:
        # If it still fails, keep original rows
        cleaned_rows.append(df[df['State'] == state])
        print("Failed again (Keeping original).")
    
    time.sleep(2) # Generous delay to prevent errors

# ==========================================
# 5. RECOMBINE AND SAVE
# ==========================================
# Keep states that were ALREADY good
good_states = df[~df['State'].isin(FAILED_STATES.keys())]

# Combine good states + fixed states
if cleaned_rows:
    fixed_df = pd.concat(cleaned_rows)
    final_df = pd.concat([good_states, fixed_df], axis=0)
    
    # Sort nicely
    final_df = final_df.sort_values(['State', 'Date'])
    
    final_df.to_csv('Master_Weather_Electricity_Data_FIXED.csv', index=False)
    print("\n------------------------------------------------")
    print("COMPLETE! Fixed file saved as: 'Master_Weather_Electricity_Data_FIXED.csv'")
    print(f"Total Rows: {len(final_df)}")
else:
    print("No updates were made.")

Loading Master_Weather_Electricity_Data.csv to patch missing states...
Patching 17 states...
Fixing: Arunachal Pradesh ... Done.
Fixing: Chhattisgarh ... Done.
Fixing: DD ... Done.
Fixing: DNH ... Done.
Fixing: ER Odisha ... Done.
Fixing: Essar steel ...   > NASA failed: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
Done.
Fixing: Goa ... Done.
Fixing: Nagaland ... Done.
Fixing: SR Karnataka ... Done.
Fixing: Sikkim ... Done.
Fixing: Uttarakhand ... Done.
Fixing: Manipur ... Done.
Fixing: Mizoram ...   > Open-Meteo failed: 'daily'
Done.
Fixing: NER Meghalaya ...   > Open-Meteo failed: 'daily'
Done.
Fixing: NR UP ... Done.
Fixing: Puducherry ... Done.
Fixing: Punjab ... Done.

------------------------------------------------
COMPLETE! Fixed file saved as: 'Master_Weather_Electricity_Data_FIXED.csv'
Total Rows: 69012
