In [2]:
# Import required libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

# Set numpy print options to avoid scientific notation
np.set_printoptions(suppress=True, precision=2)

# Load and preprocess datasets
file_paths = {
    "1993-2002": './NOAA_data_Madison/madison_1993-2002_cleaned.csv',
    "2003-2013": './NOAA_data_Madison/madison_2003-2013_cleaned.csv',
    "2014-2024": './NOAA_data_Madison/madison_2014-2024_cleaned.csv'
}

# Load datasets into dataframes
df_train = pd.concat([pd.read_csv(file_paths["1993-2002"]), pd.read_csv(file_paths["2003-2013"])])
df_validate_test = pd.read_csv(file_paths["2014-2024"])

# Feature engineering: Convert DATE to numerical features
for df in [df_train, df_validate_test]:
    df['DATE'] = pd.to_datetime(df['DATE'])
    df['Year'] = df['DATE'].dt.year
    df['Month'] = df['DATE'].dt.month
    df['Day'] = df['DATE'].dt.day
    df.drop(columns=['DATE'], inplace=True)

# Split validation and testing data from 2014-2024 data
df_validation = df_validate_test[(df_validate_test['Year'] >= 2014) & (df_validate_test['Year'] < 2019)]
df_testing = df_validate_test[(df_validate_test['Year'] >= 2019) & (df_validate_test['Year'] <= 2024)]

# Define target columns and handle missing columns dynamically
target_columns = ['PRCP', 'SNOW', 'TMAX', 'TMIN', 'TOBS', 'TSUN']

def ensure_columns(df, target_cols):
    """Ensure all target columns are present in the dataframe."""
    for col in target_cols:
        if col not in df.columns:
            df[col] = 0  # Fill missing columns with zeros (or use imputation)
    return df

df_train = ensure_columns(df_train, target_columns)
df_validation = ensure_columns(df_validation, target_columns)
df_testing = ensure_columns(df_testing, target_columns)

# Define features (X) and target variables (y)
def split_features_and_target(df):
    X = df[['Year', 'Month', 'Day']]
    y = df[target_columns]
    return X, y

X_train, y_train = split_features_and_target(df_train)
X_validation, y_validation = split_features_and_target(df_validation)
X_test, y_test = split_features_and_target(df_testing)

# Scale the data
scaler_X = StandardScaler()
X_train_scaled = scaler_X.fit_transform(X_train)
X_validation_scaled = scaler_X.transform(X_validation)
X_test_scaled = scaler_X.transform(X_test)

scaler_y = StandardScaler()
y_train_scaled = scaler_y.fit_transform(y_train)
y_validation_scaled = scaler_y.transform(y_validation)
y_test_scaled = scaler_y.transform(y_test)

# Build the neural network
model = Sequential([
    Dense(64, activation='relu', input_dim=X_train_scaled.shape[1]),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dropout(0.2),
    Dense(y_train_scaled.shape[1], activation='linear')
])

# Compile the model
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Set up early stopping
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Train the model
history = model.fit(
    X_train_scaled, y_train_scaled,
    validation_data=(X_validation_scaled, y_validation_scaled),
    epochs=100,
    batch_size=16,
    callbacks=[early_stop],
    verbose=1
)

# Evaluate the model
loss, mae = model.evaluate(X_test_scaled, y_test_scaled, verbose=0)
print(f"Test Loss: {loss:.4f}, Test MAE: {mae:.4f}")



Epoch 1/100


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col] = 0  # Fill missing columns with zeros (or use imputation)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col] = 0  # Fill missing columns with zeros (or use imputation)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m480/480[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.8276 - mae: 0.5960 - val_loss: nan - val_mae: nan
Epoch 2/100
[1m480/480[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.5897 - mae: 0.4491 - val_loss: nan - val_mae: nan
Epoch 3/100
[1m480/480[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.5307 - mae: 0.4189 - val_loss: nan - val_mae: nan
Epoch 4/100
[1m480/480[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.5407 - mae: 0.4163 - val_loss: nan - val_mae: nan
Epoch 5/100
[1m480/480[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.5222 - mae: 0.4033 - val_loss: nan - val_mae: nan
Epoch 6/100
[1m480/480[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.5435 - mae: 0.4036 - val_loss: nan - val_mae: nan
Epoch 7/100
[1m480/480[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.5350 - mae: 0.4053 - val_loss: na

In [3]:
# Save the model
model.save('weather_model.h5')

from joblib import dump
dump(scaler_X, 'scaler_X.pkl')
dump(scaler_y, 'scaler_y.pkl')



['scaler_y.pkl']

In [None]:
# Predict on new data
user_year = int(input("Enter year: "))
user_month = int(input("Enter month: "))
user_day = int(input("Enter day: "))

new_date = pd.DataFrame({'Year': [user_year], 'Month': [user_month], 'Day': [user_day]})
new_date_scaled = scaler_X.transform(new_date)
prediction_scaled = model.predict(new_date_scaled)
prediction = scaler_y.inverse_transform(prediction_scaled)[0]

# Print the predicted values with descriptive labels
factors = ['PRCP', 'SNOW', 'TMAX', 'TMIN', 'TOBS', 'TSUN']
predicted_values = {factor: float(value) for factor, value in zip(factors, prediction)}

print(f"Predicted values for {new_date.iloc[0].values}:")
for factor, value in predicted_values.items():
    print(f"{factor}: {value:.2f}")