<a href="https://colab.research.google.com/github/Cosmasrono/API/blob/main/skies.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, Flatten
from tensorflow.keras.metrics import MeanSquaredError

# Load the datasets
rainfall_data = pd.read_csv('/content/rainfall.csv')
temperature_data = pd.read_csv('/content/temp.csv')

# Rename columns to ensure they can be merged correctly
rainfall_data.columns = ['Year', 'Month', 'Average_Rainfall_(MM)']
temperature_data.columns = ['Year', 'Month', 'Average_Temperature_(Celsius)']

# Handle missing values by dropping rows with missing values
rainfall_data.dropna(inplace=True)
temperature_data.dropna(inplace=True)

# Remove duplicates
rainfall_data.drop_duplicates(inplace=True)
temperature_data.drop_duplicates(inplace=True)

# Merge datasets on 'Year' and 'Month'
data_combined = pd.merge(rainfall_data, temperature_data, on=['Year', 'Month'])

# Separate features (X) and target variable (y)
X = data_combined[['Year', 'Month', 'Average_Rainfall_(MM)']]
y = data_combined['Average_Temperature_(Celsius)']

# Encode the 'Month' feature using one-hot encoding
encoder = OneHotEncoder(sparse=False, handle_unknown='ignore')
month_encoded = encoder.fit_transform(X[['Month']])

# Combine encoded month with other features (assuming 'Year' and 'Average_Rainfall_(MM)' are numerical)
month_columns = encoder.get_feature_names_out(['Month'])
X_combined = pd.concat([X[['Year', 'Average_Rainfall_(MM)']], pd.DataFrame(month_encoded, columns=month_columns)], axis=1)

# Normalize the features
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X_combined)

# Reshape the input data to fit the CNN input shape requirements
# CNN expects 3D input: (samples, timesteps, features)
X_reshaped = X_scaled.reshape(X_scaled.shape[0], X_scaled.shape[1], 1)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_reshaped, y, test_size=0.2, random_state=42)

# Build the CNN model
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(X_train.shape[1], 1)))
model.add(Conv1D(filters=64, kernel_size=2, activation='relu'))
model.add(Flatten())
model.add(Dense(50, activation='relu'))
model.add(Dense(1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error', metrics=[MeanSquaredError()])

# Train the model
history = model.fit(X_train, y_train, epochs=50, verbose=1, validation_split=0.2)

# Evaluate the model
loss, mse = model.evaluate(X_test, y_test, verbose=0)
print(f'CNN Model Evaluation - Mean Squared Error: {mse}')

# Predict on the test data
y_pred = model.predict(X_test)

# Visualize the results
plt.figure(figsize=(14, 6))

# Plot actual vs predicted temperatures
plt.subplot(1, 2, 1)
plt.plot(y_test.values, label='Actual')
plt.plot(y_pred, label='Predicted')
plt.title('Actual vs Predicted Temperatures')
plt.xlabel('Samples')
plt.ylabel('Average Temperature (Celsius)')
plt.legend()

# Plot training & validation loss over epochs
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss (Mean Squared Error)')
plt.legend()

plt.tight_layout()
plt.show()
