### Libraries

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import pywt


In [None]:
df = pd.read_csv('weatherdata.csv')
features = df.drop('Rainfall(mm)', axis=1)
target = df['Rainfall(mm)']

features.head(10)

## Preprocessing

### convert the Months and Year

In [None]:

# convert the month to cylindrical
features['Month_sin'] = np.sin(2 * np.pi * features['Month'] / 12)
features['Month_cos'] = np.cos(2 * np.pi * features['Month'] / 12)

# drop the month column
features.drop(['Month'], axis=1, inplace=True)

# handle the year 
current_year = features['Year'].max()
features['Year_weighted'] = np.exp((features['Year'] - current_year) / 10)

features.drop(['Year'], axis=1, inplace=True)

features.head(20)


### Scaling

In [None]:
# scale the max Temp, min Temp, and rainfall humidity, wind speed, cloud coverage and sunshine

scaled_columns = ['Max Temp', 'Min Temp','Humidity (percent)', 'Wind Speed (m/s)', 'Cloud Coverage (Octs)','Humidity (percent)', 'Wind Speed (m/s)', 'Cloud Coverage (Octs)','Sunshine (Hours)']
minimax = MinMaxScaler()
features[scaled_columns] = minimax.fit_transform(features[scaled_columns])

features.head(10)

### Encoding

In [None]:
# one hot encoding of the stations and make the datatype of the column as int


features = pd.get_dummies(features, columns=['Station'], prefix='Station',dtype=int)

# Display the first few rows of the updated DataFrame
features.head(50)


### Discrete Wavelet Transform


In [None]:
# Perform 4-level decomposition using 'db4'
wavelet = 'db4'
level = 4
coefficients = pywt.wavedec(target, wavelet, level=level)

# Extract approximation (low-frequency) and detail (high-frequency) components
approximation = coefficients[0]
details = coefficients[1:]

# print("Approximation coefficients (low-frequency):\n", approximation)
# for i, detail in enumerate(details, 1):
#     print(f"Detail coefficients at level {i}:\n", detail)

In [None]:
# Reconstruct the signal with both approximation and details
reconstructed_data = pywt.waverec([approximation] + details, wavelet='db4')

# convert the reconstructed data to a dataframe
target = pd.DataFrame(reconstructed_data)
target.head(10)
