In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.neural_network import MLPRegressor
from sklearn.impute import SimpleImputer

In [None]:
# Load the dataset
df = pd.read_csv('NepalWeatherData_labels.csv')
df.head()

In [None]:
# Convert the 'DATE' column to datetime
df['DATE'] = pd.to_datetime(df['DATE'], format='%m/%d/%Y')

# Define a function to categorize dates into seasons
def date_to_season(DATE):
    if DATE.month in [12, 1, 2]:
        return 'Winter'
    elif DATE.month in [3, 4, 5]:
        return 'Spring'
    elif DATE.month in [6, 7, 8]:
        return 'Summer'
    else:
        return 'Fall'

# Apply the date_to_season function to create a 'Season' column
df['Season'] = df['DATE'].apply(date_to_season)

# Drop the 'DATE' column
df = df.drop(columns=['DATE'])

In [None]:
# Encode categorical features
label_encoder = LabelEncoder()
df['DISTRICT'] = label_encoder.fit_transform(df['DISTRICT'])
df['Season'] = label_encoder.fit_transform(df['Season'])


In [None]:
# Define features and targets
features = [
    'DISTRICT',
    'LAT',
    'LON',
    'QV2M',
    'T2MWET',
    'T2M_MAX',
    'T2M_MIN',
    'T2M_RANGE',
    'TS',
    'WS10M',
    'WS10M_MAX',
    'WS10M_MIN',
    'WS10M_RANGE',
    'WS50M',
    'WS50M_MAX',
    'WS50M_MIN',
    'WS50M_RANGE',
    'Season'
]

targets = [
    'T2M',    # Temperature
    'RH2M',   # Humidity
    'PS',     # Air pressure
    'PRECTOT' # Rainfall
]

In [None]:
# Split the data into training and testing sets
X = df[features]
y = df[targets]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [None]:
# Scale the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
# Handle missing values using SimpleImputer for the test data
imputer = SimpleImputer(strategy='mean')  # You can also use 'median' or 'most_frequent'
X_train_imputed = imputer.fit_transform(X_train_scaled)


In [None]:
# Train and evaluate neural network models for each target variable
models = {}
for target in targets:
    model = MLPRegressor(hidden_layer_sizes=(100, 50), activation='relu', max_iter=1000)
    model.fit(X_train_imputed, y_train[target].values.ravel())
    models[target] = model

In [None]:
 # Predict using the trained models and evaluate
    X_test_imputed = imputer.transform(X_test_scaled)
    predictions = model.predict(X_test_imputed)
    mse = mean_squared_error(y_test[target], predictions)
    r2 = r2_score(y_test[target], predictions)
    print(f"Target: {target}")
    print(f"Mean Squared Error: {mse}")
    print(f"R-squared: {r2}")
    print("\n")


Target: T2M
Mean Squared Error: 0.035741082457297915
R-squared: 0.999581119745507


Target: RH2M
Mean Squared Error: 0.5985083308533503
R-squared: 0.9988581817895394


Target: PS
Mean Squared Error: 0.04824556225149117
R-squared: 0.9995820993863681


Target: PRECTOT
Mean Squared Error: 21.74224312827813
R-squared: 0.41808709092990937


