In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import accuracy_score

In [None]:
# Load the dataset using pandas
data = pd.read_csv('/content/drive/MyDrive/weatherAUS.csv/weatherAUS.csv')


In [None]:
# Define categorical columns
categorical_columns = ['WindGustDir', 'WindDir9am', 'WindDir3pm']

In [None]:
# Check for null values
print(data.isnull().sum())

Date                 0
Location             0
MinTemp           1485
MaxTemp           1261
Rainfall          3261
Evaporation      62790
Sunshine         69835
WindGustDir      10326
WindGustSpeed    10263
WindDir9am       10566
WindDir3pm        4228
WindSpeed9am      1767
WindSpeed3pm      3062
Humidity9am       2654
Humidity3pm       4507
Pressure9am      15065
Pressure3pm      15028
Cloud9am         55888
Cloud3pm         59358
Temp9am           1767
Temp3pm           3609
RainToday         3261
RainTomorrow      3267
dtype: int64


In [None]:
# Encode categorical variables
data = pd.get_dummies(data, columns=categorical_columns)


In [None]:
# Check the column names
print(data.columns)


Index(['Date', 'Location', 'MinTemp', 'MaxTemp', 'Rainfall', 'Evaporation',
       'Sunshine', 'WindGustSpeed', 'WindSpeed9am', 'WindSpeed3pm',
       'Humidity9am', 'Humidity3pm', 'Pressure9am', 'Pressure3pm', 'Cloud9am',
       'Cloud3pm', 'Temp9am', 'Temp3pm', 'RainToday', 'RainTomorrow',
       'WindGustDir_E', 'WindGustDir_ENE', 'WindGustDir_ESE', 'WindGustDir_N',
       'WindGustDir_NE', 'WindGustDir_NNE', 'WindGustDir_NNW',
       'WindGustDir_NW', 'WindGustDir_S', 'WindGustDir_SE', 'WindGustDir_SSE',
       'WindGustDir_SSW', 'WindGustDir_SW', 'WindGustDir_W', 'WindGustDir_WNW',
       'WindGustDir_WSW', 'WindDir9am_E', 'WindDir9am_ENE', 'WindDir9am_ESE',
       'WindDir9am_N', 'WindDir9am_NE', 'WindDir9am_NNE', 'WindDir9am_NNW',
       'WindDir9am_NW', 'WindDir9am_S', 'WindDir9am_SE', 'WindDir9am_SSE',
       'WindDir9am_SSW', 'WindDir9am_SW', 'WindDir9am_W', 'WindDir9am_WNW',
       'WindDir9am_WSW', 'WindDir3pm_E', 'WindDir3pm_ENE', 'WindDir3pm_ESE',
       'WindDir3pm_N', '

In [None]:
# Ensure 'RainTomorrow' is correctly named and present
if 'RainTomorrow' not in data.columns:
    raise KeyError("Column 'RainTomorrow' not found in the dataset.")



In [None]:
# Separate features and target
X = data.drop('RainTomorrow', axis=1)
y = data['RainTomorrow']



In [None]:
# Convert to NumPy arrays
X = X.values
y = y.values


In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer

# Assuming 'data' is your DataFrame

# Convert date columns to numerical features (example)
# Identify date columns (replace with actual column names)
date_columns = ['Date', 'AnotherDateColumn']

for col in date_columns:
    if col in data.columns:
        data[col] = pd.to_datetime(data[col])  # Convert to datetime objects
        data[col] = (data[col] - data[col].min()).dt.days  # Convert to days since the earliest date

# Separate features and target
X = data.drop('RainTomorrow', axis=1)
y = data['RainTomorrow']

# Identify categorical and numerical columns
categorical_cols = X.select_dtypes(include=['object']).columns
numerical_cols = X.select_dtypes(exclude=['object']).columns

# Create transformers for numerical and categorical features
numerical_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(handle_unknown='ignore')  # Handle unknown categories during testing

# Combine transformers using ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_cols),
        ('cat', categorical_transformer, categorical_cols)
    ])

# Apply preprocessing to the features
X = preprocessor.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# The data is now preprocessed and ready for modeling

In [None]:
# Handle NaN, Inf, and extreme values
X_train = np.nan_to_num(X_train, nan=np.nanmean(X_train), posinf=np.nanmean(X_train), neginf=np.nanmean(X_train))
X_test = np.nan_to_num(X_test, nan=np.nanmean(X_test), posinf=np.nanmean(X_test), neginf=np.nanmean(X_test))



In [None]:
# Define the model
model = Sequential()
model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))  # Input layer
model.add(Dense(32, activation='relu'))  # Hidden layer
model.add(Dense(1, activation='sigmoid'))  # Output layer for binary classification


In [None]:
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
# Handle NaN, Inf, and extreme values (Improved)
X_train = np.nan_to_num(X_train.astype(np.float32), nan=0.0, posinf=0.0, neginf=0.0)
X_test = np.nan_to_num(X_test.astype(np.float32), nan=0.0, posinf=0.0, neginf=0.0) # Complete the function call

In [None]:
# Handle NaN, Inf, and extreme values (Improved)
X_train = np.nan_to_num(X_train.astype(np.float32), nan=0.0, posinf=0.0, neginf=0.0)
X_test = np.nan_to_num(X_test.astype(np.float32), nan=0.0, posinf=0.0, neginf=0.0) # Complete the function call

# Verify the data type
print(X_train.dtype)
print(X_test.dtype)

float32
float32


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.metrics import accuracy_score

# Load your dataset (assuming 'data' is already loaded correctly)
data = pd.read_csv('/content/drive/MyDrive/weatherAUS.csv/weatherAUS.csv')

# Define categorical columns
categorical_columns = ['WindGustDir', 'WindDir9am', 'WindDir3pm']

# Handle missing values in categorical columns before one-hot encoding
data[categorical_columns] = data[categorical_columns].fillna('Unknown')  # Replace missing values with 'Unknown'

# Encode categorical variables
data = pd.get_dummies(data, columns=categorical_columns)

# Convert date columns to numerical features (e.g., days since a reference date)
# Assuming 'Date' is the date column
data['Date'] = pd.to_datetime(data['Date'])
reference_date = data['Date'].min()
data['DaysSince'] = (data['Date'] - reference_date).dt.days
data = data.drop('Date', axis=1)  # Remove original date column

# Separate features and target
X = data.drop('RainTomorrow', axis=1)
y = data['RainTomorrow']

# Convert target variable to numeric labels (0 and 1)
le = LabelEncoder()
y = le.fit_transform(y)

# Convert to NumPy arrays
X = X.values
y = y.astype('int32')  # Ensure y is integer type

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Ensure data types are correct
X_train = X_train.astype('float32')

# Define the model
model = Sequential()
model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))  # Input layer
model.add(Dense(32, activation='relu'))  # Hidden layer
model.add(Dense(1, activation='sigmoid'))  # Output layer for binary classification

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)

# Evaluate the model on the test data
y_pred = (model.predict(X_test) > 0.5).astype("int32")

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

# Save the model
model.save('australia_rain_model.h5')



ValueError: could not convert string to float: 'Albany'