In [1]:
import pandas as pd
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam

In [2]:
data = pd.read_csv('weatherHistory.csv')
data.head()

Unnamed: 0,Formatted Date,Summary,Precip Type,Temperature (C),Apparent Temperature (C),Humidity,Wind Speed (km/h),Wind Bearing (degrees),Visibility (km),Loud Cover,Pressure (millibars),Daily Summary
0,2006-04-01 00:00:00.000 +0200,Partly Cloudy,rain,9.472222,7.388889,0.89,14.1197,251.0,15.8263,0.0,1015.13,Partly cloudy throughout the day.
1,2006-04-01 01:00:00.000 +0200,Partly Cloudy,rain,9.355556,7.227778,0.86,14.2646,259.0,15.8263,0.0,1015.63,Partly cloudy throughout the day.
2,2006-04-01 02:00:00.000 +0200,Mostly Cloudy,rain,9.377778,9.377778,0.89,3.9284,204.0,14.9569,0.0,1015.94,Partly cloudy throughout the day.
3,2006-04-01 03:00:00.000 +0200,Partly Cloudy,rain,8.288889,5.944444,0.83,14.1036,269.0,15.8263,0.0,1016.41,Partly cloudy throughout the day.
4,2006-04-01 04:00:00.000 +0200,Mostly Cloudy,rain,8.755556,6.977778,0.83,11.0446,259.0,15.8263,0.0,1016.51,Partly cloudy throughout the day.


In [3]:
data.shape

(96453, 12)

In [4]:
data.drop(['Formatted Date'], axis=1)

Unnamed: 0,Summary,Precip Type,Temperature (C),Apparent Temperature (C),Humidity,Wind Speed (km/h),Wind Bearing (degrees),Visibility (km),Loud Cover,Pressure (millibars),Daily Summary
0,Partly Cloudy,rain,9.472222,7.388889,0.89,14.1197,251.0,15.8263,0.0,1015.13,Partly cloudy throughout the day.
1,Partly Cloudy,rain,9.355556,7.227778,0.86,14.2646,259.0,15.8263,0.0,1015.63,Partly cloudy throughout the day.
2,Mostly Cloudy,rain,9.377778,9.377778,0.89,3.9284,204.0,14.9569,0.0,1015.94,Partly cloudy throughout the day.
3,Partly Cloudy,rain,8.288889,5.944444,0.83,14.1036,269.0,15.8263,0.0,1016.41,Partly cloudy throughout the day.
4,Mostly Cloudy,rain,8.755556,6.977778,0.83,11.0446,259.0,15.8263,0.0,1016.51,Partly cloudy throughout the day.
...,...,...,...,...,...,...,...,...,...,...,...
96448,Partly Cloudy,rain,26.016667,26.016667,0.43,10.9963,31.0,16.1000,0.0,1014.36,Partly cloudy starting in the morning.
96449,Partly Cloudy,rain,24.583333,24.583333,0.48,10.0947,20.0,15.5526,0.0,1015.16,Partly cloudy starting in the morning.
96450,Partly Cloudy,rain,22.038889,22.038889,0.56,8.9838,30.0,16.1000,0.0,1015.66,Partly cloudy starting in the morning.
96451,Partly Cloudy,rain,21.522222,21.522222,0.60,10.5294,20.0,16.1000,0.0,1015.95,Partly cloudy starting in the morning.


In [5]:
data['Precip Type'].unique()

array(['rain', 'snow', nan], dtype=object)

In [7]:
rain_data = data[data['Precip Type'] == 'rain']
rain_data.head()

Unnamed: 0,Formatted Date,Summary,Precip Type,Temperature (C),Apparent Temperature (C),Humidity,Wind Speed (km/h),Wind Bearing (degrees),Visibility (km),Loud Cover,Pressure (millibars),Daily Summary
0,2006-04-01 00:00:00.000 +0200,Partly Cloudy,rain,9.472222,7.388889,0.89,14.1197,251.0,15.8263,0.0,1015.13,Partly cloudy throughout the day.
1,2006-04-01 01:00:00.000 +0200,Partly Cloudy,rain,9.355556,7.227778,0.86,14.2646,259.0,15.8263,0.0,1015.63,Partly cloudy throughout the day.
2,2006-04-01 02:00:00.000 +0200,Mostly Cloudy,rain,9.377778,9.377778,0.89,3.9284,204.0,14.9569,0.0,1015.94,Partly cloudy throughout the day.
3,2006-04-01 03:00:00.000 +0200,Partly Cloudy,rain,8.288889,5.944444,0.83,14.1036,269.0,15.8263,0.0,1016.41,Partly cloudy throughout the day.
4,2006-04-01 04:00:00.000 +0200,Mostly Cloudy,rain,8.755556,6.977778,0.83,11.0446,259.0,15.8263,0.0,1016.51,Partly cloudy throughout the day.


In [None]:
rain_data_cleaned = rain_data.drop(columns=['Formatted Date','Precip Type', 'Daily Summary', 'Pressure (millibars)', 'Visibility (km)', 'Wind Bearing (degrees)', 'Pressure (millibars)'])


In [10]:
df = rain_data_cleaned
df.head()

Unnamed: 0,Formatted Date,Summary,Temperature (C),Apparent Temperature (C),Humidity,Wind Speed (km/h),Loud Cover
0,2006-04-01 00:00:00.000 +0200,Partly Cloudy,9.472222,7.388889,0.89,14.1197,0.0
1,2006-04-01 01:00:00.000 +0200,Partly Cloudy,9.355556,7.227778,0.86,14.2646,0.0
2,2006-04-01 02:00:00.000 +0200,Mostly Cloudy,9.377778,9.377778,0.89,3.9284,0.0
3,2006-04-01 03:00:00.000 +0200,Partly Cloudy,8.288889,5.944444,0.83,14.1036,0.0
4,2006-04-01 04:00:00.000 +0200,Mostly Cloudy,8.755556,6.977778,0.83,11.0446,0.0


In [13]:
df.drop(['Formatted Date'],axis=1)

Unnamed: 0,Summary,Temperature (C),Apparent Temperature (C),Humidity,Wind Speed (km/h),Loud Cover
0,Partly Cloudy,9.472222,7.388889,0.89,14.1197,0.0
1,Partly Cloudy,9.355556,7.227778,0.86,14.2646,0.0
2,Mostly Cloudy,9.377778,9.377778,0.89,3.9284,0.0
3,Partly Cloudy,8.288889,5.944444,0.83,14.1036,0.0
4,Mostly Cloudy,8.755556,6.977778,0.83,11.0446,0.0
...,...,...,...,...,...,...
96448,Partly Cloudy,26.016667,26.016667,0.43,10.9963,0.0
96449,Partly Cloudy,24.583333,24.583333,0.48,10.0947,0.0
96450,Partly Cloudy,22.038889,22.038889,0.56,8.9838,0.0
96451,Partly Cloudy,21.522222,21.522222,0.60,10.5294,0.0


In [14]:
df['Summary'].unique()

array(['Partly Cloudy', 'Mostly Cloudy', 'Overcast', 'Foggy',
       'Breezy and Mostly Cloudy', 'Clear', 'Breezy and Partly Cloudy',
       'Breezy and Overcast', 'Humid and Mostly Cloudy',
       'Humid and Partly Cloudy', 'Windy and Foggy', 'Windy and Overcast',
       'Windy and Partly Cloudy', 'Breezy', 'Dry and Partly Cloudy',
       'Windy and Mostly Cloudy', 'Dangerously Windy and Partly Cloudy',
       'Dry', 'Windy', 'Humid and Overcast', 'Breezy and Foggy',
       'Light Rain', 'Drizzle', 'Windy and Dry', 'Dry and Mostly Cloudy',
       'Breezy and Dry', 'Rain'], dtype=object)

In [18]:
df['Summary'].value_counts()

Summary
Partly Cloudy                          29915
Mostly Cloudy                          26028
Overcast                               13916
Clear                                   9371
Foggy                                   4135
Breezy and Mostly Cloudy                 490
Breezy and Overcast                      472
Breezy and Partly Cloudy                 379
Dry and Partly Cloudy                     86
Windy and Partly Cloudy                   67
Light Rain                                63
Breezy                                    42
Windy and Overcast                        42
Humid and Mostly Cloudy                   40
Drizzle                                   39
Windy and Mostly Cloudy                   35
Dry                                       34
Humid and Partly Cloudy                   17
Dry and Mostly Cloudy                     14
Rain                                      10
Windy                                      8
Breezy and Foggy                           8
Hu

In [19]:
# df['Rain'] = df['Summary'].apply(lambda x: 1 if 'rain' in str(x).lower() else 0)
df['Rain'] = df['Summary'].apply(lambda x: 1 if any(weather in str(x).lower() for weather in ['rain', 'drizzle']) else 0)


In [20]:
df['Rain'].unique()


array([0, 1], dtype=int64)

In [21]:
df['Rain'].value_counts()

Rain
0    85112
1      112
Name: count, dtype: int64

In [22]:
df.drop(['Summary'],axis=1)

Unnamed: 0,Formatted Date,Temperature (C),Apparent Temperature (C),Humidity,Wind Speed (km/h),Loud Cover,Rain
0,2006-04-01 00:00:00.000 +0200,9.472222,7.388889,0.89,14.1197,0.0,0
1,2006-04-01 01:00:00.000 +0200,9.355556,7.227778,0.86,14.2646,0.0,0
2,2006-04-01 02:00:00.000 +0200,9.377778,9.377778,0.89,3.9284,0.0,0
3,2006-04-01 03:00:00.000 +0200,8.288889,5.944444,0.83,14.1036,0.0,0
4,2006-04-01 04:00:00.000 +0200,8.755556,6.977778,0.83,11.0446,0.0,0
...,...,...,...,...,...,...,...
96448,2016-09-09 19:00:00.000 +0200,26.016667,26.016667,0.43,10.9963,0.0,0
96449,2016-09-09 20:00:00.000 +0200,24.583333,24.583333,0.48,10.0947,0.0,0
96450,2016-09-09 21:00:00.000 +0200,22.038889,22.038889,0.56,8.9838,0.0,0
96451,2016-09-09 22:00:00.000 +0200,21.522222,21.522222,0.60,10.5294,0.0,0


In [25]:
df.drop(['Formatted Date', 'Summary'], axis=1, inplace=True)


In [28]:
df.drop(['Loud Cover'], axis=1, inplace=True)


In [29]:
df.head()

Unnamed: 0,Temperature (C),Apparent Temperature (C),Humidity,Wind Speed (km/h),Rain
0,9.472222,7.388889,0.89,14.1197,0
1,9.355556,7.227778,0.86,14.2646,0
2,9.377778,9.377778,0.89,3.9284,0
3,8.288889,5.944444,0.83,14.1036,0
4,8.755556,6.977778,0.83,11.0446,0


In [30]:
X = df.drop(columns=['Rain'])
y = df['Rain']


In [31]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [32]:
from imblearn.over_sampling import SMOTE

# Initialize SMOTE
smote = SMOTE(random_state=42)

# Apply SMOTE to the training data
X_train_balanced, y_train_balanced = smote.fit_resample(X_train, y_train)

In [33]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense
import numpy as np

# Reshaping data to 3D for CNN (as expected by CNN layers)
X_train_cnn = np.expand_dims(X_train_balanced, axis=-1)
X_test_cnn = np.expand_dims(X_test, axis=-1)

# Build the CNN model
model = Sequential()

# 1D Convolution layer
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X_train_cnn.shape[1], 1)))
model.add(MaxPooling1D(pool_size=2))

# Flatten the output
model.add(Flatten())

# Fully connected layer
model.add(Dense(units=64, activation='relu'))
model.add(Dense(units=1, activation='sigmoid'))  # Binary classification (0 or 1)

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [34]:
history = model.fit(X_train_cnn, y_train_balanced, epochs=10, batch_size=32, validation_data=(X_test_cnn, y_test))


Epoch 1/10
[1m4256/4256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 3ms/step - accuracy: 0.8336 - loss: 0.3549 - val_accuracy: 0.9030 - val_loss: 0.1991
Epoch 2/10
[1m4256/4256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 4ms/step - accuracy: 0.9218 - loss: 0.2130 - val_accuracy: 0.8620 - val_loss: 0.3108
Epoch 3/10
[1m4256/4256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 3ms/step - accuracy: 0.9321 - loss: 0.1879 - val_accuracy: 0.9224 - val_loss: 0.1621
Epoch 4/10
[1m4256/4256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 4ms/step - accuracy: 0.9361 - loss: 0.1746 - val_accuracy: 0.9089 - val_loss: 0.1877
Epoch 5/10
[1m4256/4256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 3ms/step - accuracy: 0.9437 - loss: 0.1593 - val_accuracy: 0.9273 - val_loss: 0.1602
Epoch 6/10
[1m4256/4256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 3ms/step - accuracy: 0.9470 - loss: 0.1523 - val_accuracy: 0.9278 - val_loss: 0.1652
Epoch 7/10

In [35]:
model.save('model.keras')

In [36]:
model.save('model.h5')




In [1]:
y_pred = model.predict(X_test_cnn)
y_pred_classes = (y_pred > 0.5).astype(int)

# Calculate and display classification metrics
from sklearn.metrics import classification_report, precision_recall_fscore_support
print("Classification Report:")
print(classification_report(y_test, y_pred_classes))

# Get specific metrics
precision, recall, f1, _ = precision_recall_fscore_support(y_test, y_pred_classes, average='binary')
print("\nSummary Metrics:")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

NameError: name 'model' is not defined