In [1]:
import requests
import json
from datetime import datetime, timedelta
import pandas as pd

In [2]:
def fetch_nasa_data(year, latitude, longitude):
    start_date = f"{year}0101"
    end_date = f"{year}1231"

    url = f"https://power.larc.nasa.gov/api/temporal/daily/point"
    params = {
        'parameters': 'T2M,T2M_MAX,T2M_MIN,RH2M,WS2M,ALLSKY_SFC_SW_DWN,ALLSKY_SFC_LW_DWN,PS,PRECTOTCORR',
        'community': 'RE',
        'longitude': longitude,
        'latitude': latitude,
        'start': start_date,
        'end': end_date,
        'format': 'JSON'
    }

    response = requests.get(url, params=params)
    data = response.json()
    return data


In [3]:
def parse_nasa_data(json_data):
    p = json_data['properties']['parameter']
    dates = list(p['T2M'].keys())

    df = pd.DataFrame({
        'date': pd.to_datetime(dates, format='%Y%m%d'),
        'T2M': [p['T2M'][d] for d in dates],
        'T2M_MAX': [p['T2M_MAX'][d] for d in dates],
        'T2M_MIN': [p['T2M_MIN'][d] for d in dates],
        'RH2M': [p['RH2M'][d] for d in dates],
        'WS2M': [p['WS2M'][d] for d in dates],
        'ALLSKY_SFC_SW_DWN': [p['ALLSKY_SFC_SW_DWN'][d] for d in dates],
        'ALLSKY_SFC_LW_DWN': [p['ALLSKY_SFC_LW_DWN'][d] for d in dates],
        'PS': [p['PS'][d] for d in dates],
        'PRECTOTCORR': [p['PRECTOTCORR'][d] for d in dates]
    })

    df['month'] = df['date'].dt.month
    df['day'] = df['date'].dt.day
    df['day_of_year'] = df['date'].dt.dayofyear

    return df


In [4]:
def collect_weather_data(start_year, end_year, lat=19.0760, lon=72.8777):
    full_df = pd.DataFrame()
    for year in range(start_year, end_year + 1):
        print(f"Fetching data for {year}...")
        data = fetch_nasa_data(year, lat, lon)
        df = parse_nasa_data(data)
        full_df = pd.concat([full_df, df], ignore_index=True)
    return full_df


In [5]:
df = collect_weather_data(2000, 2024)
df = df.dropna()
df.head()


Fetching data for 2000...
Fetching data for 2001...
Fetching data for 2002...
Fetching data for 2003...
Fetching data for 2004...
Fetching data for 2005...
Fetching data for 2006...
Fetching data for 2007...
Fetching data for 2008...
Fetching data for 2009...
Fetching data for 2010...
Fetching data for 2011...
Fetching data for 2012...
Fetching data for 2013...
Fetching data for 2014...
Fetching data for 2015...
Fetching data for 2016...
Fetching data for 2017...
Fetching data for 2018...
Fetching data for 2019...
Fetching data for 2020...
Fetching data for 2021...
Fetching data for 2022...
Fetching data for 2023...
Fetching data for 2024...


Unnamed: 0,date,T2M,T2M_MAX,T2M_MIN,RH2M,WS2M,ALLSKY_SFC_SW_DWN,ALLSKY_SFC_LW_DWN,PS,PRECTOTCORR,month,day,day_of_year
0,2000-01-01,23.31,32.23,16.1,52.92,2.06,4.8254,8.7187,100.0,0.0,1,1,1
1,2000-01-02,23.13,32.05,16.43,49.59,2.49,4.8842,8.7247,100.09,0.0,1,2,2
2,2000-01-03,22.47,32.2,15.27,47.21,2.33,4.89,8.6647,100.02,0.0,1,3,3
3,2000-01-04,22.52,32.46,15.23,46.71,2.29,4.8806,8.61,99.98,0.0,1,4,4
4,2000-01-05,22.14,31.99,15.53,45.44,2.09,4.9529,8.5634,100.0,0.0,1,5,5


In [6]:
# Check missing values in the DataFrame
print("Missing values per column:")
print(df.isnull().sum())


Missing values per column:
date                 0
T2M                  0
T2M_MAX              0
T2M_MIN              0
RH2M                 0
WS2M                 0
ALLSKY_SFC_SW_DWN    0
ALLSKY_SFC_LW_DWN    0
PS                   0
PRECTOTCORR          0
month                0
day                  0
day_of_year          0
dtype: int64


In [7]:
# Convert 'date' column to datetime type
df['date'] = pd.to_datetime(df['date'])

# Extract date features
df['month'] = df['date'].dt.month
df['day'] = df['date'].dt.day
df['weekday'] = df['date'].dt.weekday
df['day_of_year'] = df['date'].dt.dayofyear

# Optional: Define seasons (Northern Hemisphere example)
def get_season(month):
    if month in [12, 1, 2]:
        return 'Winter'
    elif month in [3, 4, 5]:
        return 'Spring'
    elif month in [6, 7, 8]:
        return 'Summer'
    else:
        return 'Fall'

df['season'] = df['month'].apply(get_season)

df.head()


Unnamed: 0,date,T2M,T2M_MAX,T2M_MIN,RH2M,WS2M,ALLSKY_SFC_SW_DWN,ALLSKY_SFC_LW_DWN,PS,PRECTOTCORR,month,day,day_of_year,weekday,season
0,2000-01-01,23.31,32.23,16.1,52.92,2.06,4.8254,8.7187,100.0,0.0,1,1,1,5,Winter
1,2000-01-02,23.13,32.05,16.43,49.59,2.49,4.8842,8.7247,100.09,0.0,1,2,2,6,Winter
2,2000-01-03,22.47,32.2,15.27,47.21,2.33,4.89,8.6647,100.02,0.0,1,3,3,0,Winter
3,2000-01-04,22.52,32.46,15.23,46.71,2.29,4.8806,8.61,99.98,0.0,1,4,4,1,Winter
4,2000-01-05,22.14,31.99,15.53,45.44,2.09,4.9529,8.5634,100.0,0.0,1,5,5,2,Winter


In [8]:
df = df.rename(columns={
    "T2M": "Temperature_Avg",
    "T2M_MAX": "Temperature_Max",
    "T2M_MIN": "Temperature_Min",
    "RH2M": "Relative_Humidity",
    "WS2M": "Wind_Speed",
    "ALLSKY_SFC_SW_DWN": "Solar_Radiation_SW",
    "ALLSKY_SFC_LW_DWN": "Solar_Radiation_LW",
    "PS": "Pressure",
    "PRECTOTCORR": "Rainfall_mm",
    "month": "Month",
    "day": "Day",
    "day_of_year": "Day_Of_Year",
    "weekday": "Weekday",
    "season": "Season"
})

In [9]:
df.head()

Unnamed: 0,date,Temperature_Avg,Temperature_Max,Temperature_Min,Relative_Humidity,Wind_Speed,Solar_Radiation_SW,Solar_Radiation_LW,Pressure,Rainfall_mm,Month,Day,Day_Of_Year,Weekday,Season
0,2000-01-01,23.31,32.23,16.1,52.92,2.06,4.8254,8.7187,100.0,0.0,1,1,1,5,Winter
1,2000-01-02,23.13,32.05,16.43,49.59,2.49,4.8842,8.7247,100.09,0.0,1,2,2,6,Winter
2,2000-01-03,22.47,32.2,15.27,47.21,2.33,4.89,8.6647,100.02,0.0,1,3,3,0,Winter
3,2000-01-04,22.52,32.46,15.23,46.71,2.29,4.8806,8.61,99.98,0.0,1,4,4,1,Winter
4,2000-01-05,22.14,31.99,15.53,45.44,2.09,4.9529,8.5634,100.0,0.0,1,5,5,2,Winter


In [11]:

df.to_csv('mumbai_weather_data.csv', index=False)
