In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

data_large = pd.read_csv('household_power_consumption.txt', sep=';', 
                         parse_dates={'Datetime': ['Date', 'Time']}, 
                         infer_datetime_format=True, 
                         low_memory=False, na_values=['nan','?'])

cols_to_numeric = ['Global_active_power', 'Global_reactive_power', 'Voltage', 
                   'Global_intensity', 'Sub_metering_1', 'Sub_metering_2', 'Sub_metering_3']
for col in cols_to_numeric:
    data_large[col] = pd.to_numeric(data_large[col], errors='coerce')

data_large.dropna(inplace=True)

data_large['Hour'] = data_large['Datetime'].dt.hour
data_large['Day'] = data_large['Datetime'].dt.dayofweek
data_large['Month'] = data_large['Datetime'].dt.month

data_large.sort_values('Datetime', inplace=True)

data_large['Target'] = data_large['Global_active_power'].shift(-1440)
data_large.dropna(subset=['Target'], inplace=True)

features_large = data_large[['Global_reactive_power', 'Voltage', 'Global_intensity', 'Sub_metering_1', 
                             'Sub_metering_2', 'Sub_metering_3', 'Hour', 'Day', 'Month']]
target_large = data_large['Target']

X_train_large, X_test_large, y_train_large, y_test_large = train_test_split(
    features_large, target_large, test_size=0.2, shuffle=False)

scaler_large = StandardScaler()
X_train_large = scaler_large.fit_transform(X_train_large)
X_test_large = scaler_large.transform(X_test_large)

model_large = Sequential([
    Dense(64, activation='relu', input_shape=(X_train_large.shape[1],)),
    Dense(32, activation='relu'),
    Dense(1)
])

model_large.compile(optimizer='adam', loss='mean_squared_error')
model_large.fit(X_train_large, y_train_large, epochs=5, batch_size=128, validation_split=0.1)

weather_data = pd.read_csv("spanish-cities-energy-consumption/weather_features.csv")
energy_data = pd.read_csv("spanish-cities-energy-consumption/energy_dataset.csv")

energy_data.dropna(axis=1, how='all', inplace=True)

weather_data['dt_iso'] = pd.to_datetime(weather_data['dt_iso'], utc=True).dt.tz_localize(None)
energy_data['time'] = pd.to_datetime(energy_data['time'], utc=True).dt.tz_localize(None)

weather_data.dropna(subset=['dt_iso'], inplace=True)
energy_data.dropna(subset=['time'], inplace=True)

weather_data.sort_values('dt_iso', inplace=True)
energy_data.sort_values('time', inplace=True)

data_small = pd.merge_asof(weather_data,
                           energy_data,
                           left_on='dt_iso',
                           right_on='time',
                           direction='nearest')

data_small.sort_values('dt_iso', inplace=True)
data_small['Target'] = data_small['total load actual'].shift(-24)
data_small.dropna(subset=['Target'], inplace=True)

data_small['Hour'] = data_small['dt_iso'].dt.hour
data_small['Day'] = data_small['dt_iso'].dt.dayofweek
data_small['Month'] = data_small['dt_iso'].dt.month

features_small = data_small[['Hour', 'Day', 'Month']]

missing_features = [col for col in features_large.columns if col not in features_small.columns]
for col in missing_features:
    features_small[col] = 0

features_small = features_small[features_large.columns]
target_small = data_small['Target']

X_small = scaler_large.transform(features_small)
y_small = target_small.values

model_large.compile(optimizer=Adam(learning_rate=0.0001), loss='mean_squared_error')

split_index = int(len(X_small) * 0.8)
X_train_small, X_val_small = X_small[:split_index], X_small[split_index:]
y_train_small, y_val_small = y_small[:split_index], y_small[split_index:]

model_large.fit(X_train_small, y_train_small, epochs=5, batch_size=64, validation_data=(X_val_small, y_val_small))

y_pred_small = model_large.predict(X_val_small)

def calculate_metrics(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    return mse, rmse, mae, r2

metrics_transfer = calculate_metrics(y_val_small, y_pred_small)

print(f"Transfer Learning Model MSE: {metrics_transfer[0]}")
print(f"Transfer Learning Model RMSE: {metrics_transfer[1]}")
print(f"Transfer Learning Model MAE: {metrics_transfer[2]}")
print(f"Transfer Learning Model R2: {metrics_transfer[3]}")
