<a href="https://colab.research.google.com/github/Cosmasrono/API/blob/main/skies.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.neighbors import KNeighborsRegressor
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# Load the datasets
rainfall_data = pd.read_csv('/content/rainfall.csv')
temperature_data = pd.read_csv('/content/temp.csv')

# Print basic information about the datasets
print("Rainfall Data Information:")
print(rainfall_data.info())
print("\nRainfall Data Description:")
print(rainfall_data.describe())
print("\nRainfall Data Head:")
print(rainfall_data.head())
print("\nRainfall Data Columns:")
print(rainfall_data.columns)
print("\nRainfall Data Null Values:")
print(rainfall_data.isnull().sum())

print("\nTemperature Data Information:")
print(temperature_data.info())
print("\nTemperature Data Description:")
print(temperature_data.describe())
print("\nTemperature Data Head:")
print(temperature_data.head())
print("\nTemperature Data Columns:")
print(temperature_data.columns)
print("\nTemperature Data Null Values:")
print(temperature_data.isnull().sum())

# Rename columns to ensure they can be merged correctly
rainfall_data.columns = ['Year', 'Month', 'Average_Rainfall_(MM)']
temperature_data.columns = ['Year', 'Month', 'Average_Temperature_(Celsius)']

# Handle missing values by dropping rows with missing values
rainfall_data.dropna(inplace=True)
temperature_data.dropna(inplace=True)

# Remove duplicates
rainfall_data.drop_duplicates(inplace=True)
temperature_data.drop_duplicates(inplace=True)

# Merge datasets on 'Year' and 'Month'
data_combined = pd.merge(rainfall_data, temperature_data, on=['Year', 'Month'])

# Separate features (X) and target variable (y)
X = data_combined[['Year', 'Month', 'Average_Rainfall_(MM)']]
y = data_combined['Average_Temperature_(Celsius)']

# Encode the 'Month' feature using one-hot encoding
encoder = OneHotEncoder(sparse=False, handle_unknown='ignore')
month_encoded = encoder.fit_transform(X[['Month']])

# Combine encoded month with other features
month_columns = encoder.get_feature_names_out(['Month'])
X_combined = pd.concat([X[['Year', 'Average_Rainfall_(MM)']],
                        pd.DataFrame(month_encoded, columns=month_columns)], axis=1)

# Normalize the features
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X_combined)

# Reshape the input data to fit the LSTM input shape requirements (samples, timesteps, features)
X_reshaped = X_scaled.reshape(X_scaled.shape[0], X_scaled.shape[1], 1)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_reshaped, y, test_size=0.2, random_state=42)

# Reshape X for KNN
X_train_knn = X_train.reshape(X_train.shape[0], -1)
X_test_knn = X_test.reshape(X_test.shape[0], -1)

# Train the KNN model
knn_model = KNeighborsRegressor(n_neighbors=5)
knn_model.fit(X_train_knn, y_train)

# Predict on the test data
knn_y_pred = knn_model.predict(X_test_knn)

# Evaluate the KNN model
knn_mse = mean_squared_error(y_test, knn_y_pred)
knn_mae = mean_absolute_error(y_test, knn_y_pred)
print(f'KNN Model Evaluation - Mean Squared Error: {knn_mse}')
print(f'KNN Model Evaluation - Mean Absolute Error: {knn_mae}')

# Convert regression predictions to binary class predictions for classification metrics
threshold = y_train.mean()  # You can set a threshold value according to your specific needs
knn_y_pred_class = (knn_y_pred >= threshold).astype(int)
y_test_class = (y_test >= threshold).astype(int)

# Print classification report for KNN
print("\nKNN Classification Report:")
print(classification_report(y_test_class, knn_y_pred_class))

# Build the LSTM model
lstm_model = Sequential([
    LSTM(50, activation='relu', input_shape=(X_train.shape[1], 1)),
    Dense(1)
])

# Compile the LSTM model
lstm_model.compile(optimizer='adam', loss='mean_squared_error')

# Train the LSTM model
lstm_history = lstm_model.fit(X_train, y_train, epochs=50, verbose=1, validation_split=0.2)

# Evaluate the LSTM model
lstm_mse = lstm_model.evaluate(X_test, y_test)
print(f'LSTM Model Evaluation - Mean Squared Error: {lstm_mse}')

# Predict on the test data using LSTM model
lstm_y_pred = lstm_model.predict(X_test)

# Convert regression predictions to binary class predictions for classification metrics
lstm_y_pred_class = (lstm_y_pred >= threshold).astype(int)

# Print classification report for LSTM
print("\nLSTM Classification Report:")
print(classification_report(y_test_class, lstm_y_pred_class))

# Visualize the results
plt.figure(figsize=(14, 8))

# Plot actual vs predicted temperatures for KNN
plt.subplot(2, 2, 1)
plt.plot(y_test.values, label='Actual')
plt.plot(knn_y_pred, label='Predicted')
plt.title('KNN: Actual vs Predicted Temperatures')
plt.xlabel('Samples')
plt.ylabel('Average Temperature (Celsius)')
plt.legend()

# Plot actual vs predicted temperatures for LSTM
plt.subplot(2, 2, 2)
plt.plot(y_test.values, label='Actual')
plt.plot(lstm_y_pred, label='Predicted')
plt.title('LSTM: Actual vs Predicted Temperatures')
plt.xlabel('Samples')
plt.ylabel('Average Temperature (Celsius)')
plt.legend()

# Plot training & validation loss over epochs for LSTM
plt.subplot(2, 2, 3)
plt.plot(lstm_history.history['loss'], label='Training Loss')
plt.plot(lstm_history.history['val_loss'], label='Validation Loss')
plt.title('LSTM: Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss (Mean Squared Error)')
plt.legend()

# Plot correlation matrix (using only numeric columns)
plt.figure(figsize=(10, 6))
numeric_cols = data_combined.select_dtypes(include=[np.number])
sns.heatmap(numeric_cols.corr(), annot=True, cmap='coolwarm')
plt.title('Correlation Matrix')
plt.show()

# Plot pair plot
sns.pairplot(numeric_cols)
plt.show()

# Summary of Evaluation
print(f'KNN Model Evaluation - Mean Squared Error: {knn_mse}')
print(f'KNN Model Evaluation - Mean Absolute Error: {knn_mae}')
print(f'LSTM Model Evaluation - Mean Squared Error: {lstm_mse}')
