In [3]:
import pandas as pd  #importing libraries
import numpy as np

In [4]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense

In [5]:
import seaborn as sns
import matplotlib.pyplot as plt

In [6]:
df = pd.read_csv("weatherAUS.csv")

In [7]:
df.head()

Unnamed: 0,Date,Location,MinTemp,MaxTemp,Rainfall,Evaporation,Sunshine,WindGustDir,WindGustSpeed,WindDir9am,...,Humidity9am,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm,Temp9am,Temp3pm,RainToday,RainTomorrow
0,2008-12-01,Albury,13.4,22.9,0.6,,,W,44.0,W,...,71.0,22.0,1007.7,1007.1,8.0,,16.9,21.8,No,No
1,2008-12-02,Albury,7.4,25.1,0.0,,,WNW,44.0,NNW,...,44.0,25.0,1010.6,1007.8,,,17.2,24.3,No,No
2,2008-12-03,Albury,12.9,25.7,0.0,,,WSW,46.0,W,...,38.0,30.0,1007.6,1008.7,,2.0,21.0,23.2,No,No
3,2008-12-04,Albury,9.2,28.0,0.0,,,NE,24.0,SE,...,45.0,16.0,1017.6,1012.8,,,18.1,26.5,No,No
4,2008-12-05,Albury,17.5,32.3,1.0,,,W,41.0,ENE,...,82.0,33.0,1010.8,1006.0,7.0,8.0,17.8,29.7,No,No


In [8]:
df['Date'] = pd.to_datetime(df['Date'])

In [9]:
df.dtypes

Date             datetime64[ns]
Location                 object
MinTemp                 float64
MaxTemp                 float64
Rainfall                float64
Evaporation             float64
Sunshine                float64
WindGustDir              object
WindGustSpeed           float64
WindDir9am               object
WindDir3pm               object
WindSpeed9am            float64
WindSpeed3pm            float64
Humidity9am             float64
Humidity3pm             float64
Pressure9am             float64
Pressure3pm             float64
Cloud9am                float64
Cloud3pm                float64
Temp9am                 float64
Temp3pm                 float64
RainToday                object
RainTomorrow             object
dtype: object

In [10]:
df=df.sort_values('Date')

In [11]:
data = df['Sunshine'].values.reshape(-1,1)

In [12]:
scaler = MinMaxScaler()
data = scaler.fit_transform(data)

In [13]:
def create_sequences(data, sequence_length):
    sequences, targets = [], []
    for i in range(len(data) - sequence_length):
        seq = data[i:i+sequence_length]
        target = data[i+sequence_length]
        sequences.append(seq)
        targets.append(target)
    return np.array(sequences), np.array(targets)

In [14]:
sequence_length = 10
X, y = create_sequences(data, sequence_length)

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)


In [16]:
print(np.isnan(X_train).any(), np.isnan(y_train).any())
print(np.isnan(X_test).any(), np.isnan(y_test).any())        

True True
True True


In [17]:
X_train = np.nan_to_num(X_train, nan=np.nanmean(X_train))
y_train = np.nan_to_num(y_train, nan=np.nanmean(y_train))
X_test = np.nan_to_num(X_test, nan=np.nanmean(X_test))
y_test = np.nan_to_num(y_test, nan=np.nanmean(y_test))


In [18]:
print(np.isnan(X_train).any(), np.isnan(y_train).any())
print(np.isnan(X_test).any(), np.isnan(y_test).any())

False False
False False


In [19]:
model_rnn = Sequential()   
model_rnn.add(SimpleRNN(10, input_shape=(X_train.shape[1], 1)))  
model_rnn.add(Dense(1))     
model_rnn.compile(optimizer='adam', loss='mse')


In [20]:
p = model_rnn.fit(X_train, y_train, epochs=5, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [21]:
loss_rnn = model_rnn.evaluate(X_test, y_test)
print(f'Mean Squared Error on Test Set (RNN): {loss_rnn}')

Mean Squared Error on Test Set (RNN): 0.0222913958132267


In [22]:
predictions_rnn = model_rnn.predict(X_test)




In [23]:
threshold = 0.5

def binarize_rainfall(sunshine, threshold):
    return (sunshine > threshold).astype(int)

predictions_rnn = model_rnn.predict(X_test)

binary_predictions = binarize_rainfall(predictions_rnn, threshold)
binary_actual_values = binarize_rainfall(y_test, threshold)

accuracy = np.mean(binary_predictions == binary_actual_values)
print(f'Accuracy on Test Set: {accuracy:.5%}')

Accuracy on Test Set: 82.73290%


In [24]:
from sklearn.metrics import precision_score, matthews_corrcoef


In [28]:
threshold = 0.5
binary_predictions = (predictions_rnn > threshold).astype(int)
binary_actual_values = (y_test > threshold).astype(int)

# Calculate accuracy
accuracy = np.mean(binary_predictions == binary_actual_values)
print(f'Accuracy on Test Set: {accuracy:.5%}')

# Calculate precision
precision_rnn = precision_score(binary_actual_values, binary_predictions)
print(f'Precision on Test Set: {precision_rnn:.5f}')

# Calculate MCC
mcc_rnn = matthews_corrcoef(binary_actual_values, binary_predictions)
print(f'MCC on Test Set: {mcc_rnn:.5f}')

Accuracy on Test Set: 82.73290%
Precision on Test Set: 0.88071
MCC on Test Set: 0.03477
