<a href="https://colab.research.google.com/github/Haabey/hamoye/blob/main/Hamoye_TimeSeriesAnalysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
# Import the necessary python libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from prophet import Prophet

In [30]:
# Load the dataset
data = pd.read_csv("/content/household_power_consumption.txt", sep=",")

In [31]:
# Explore the dataset
data.head()

Unnamed: 0,Date;Time;Global_active_power;Global_reactive_power;Voltage;Global_intensity;Sub_metering_1;Sub_metering_2;Sub_metering_3
0,16/12/2006;17:24:00;4.216;0.418;234.840;18.400...
1,16/12/2006;17:25:00;5.360;0.436;233.630;23.000...
2,16/12/2006;17:26:00;5.374;0.498;233.290;23.000...
3,16/12/2006;17:27:00;5.388;0.502;233.740;23.000...
4,16/12/2006;17:28:00;3.666;0.528;235.680;15.800...



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.



In [32]:
# Explore the dataset
data.tail()

Unnamed: 0,Date;Time;Global_active_power;Global_reactive_power;Voltage;Global_intensity;Sub_metering_1;Sub_metering_2;Sub_metering_3
962606,11/9/2008;08:43:00;1.378;0.288;239.070;5.800;0...
962607,11/9/2008;08:44:00;1.414;0.316;239.070;6.000;0...
962608,11/9/2008;08:45:00;1.402;0.312;239.040;6.000;0...
962609,11/9/2008;08:46:00;1.380;0.274;239.470;5.800;0...
962610,11/9/200


In [33]:
data.shape

(962611, 1)

In [9]:
data.info

In [34]:
data.describe()

Unnamed: 0,Date;Time;Global_active_power;Global_reactive_power;Voltage;Global_intensity;Sub_metering_1;Sub_metering_2;Sub_metering_3
count,962611
unique,913887
top,18/3/2007;17:49:00;0.282;0.000;240.890;1.200;0...
freq,2


In [5]:
# Check for missing data
data.isnull().sum()

Date;Time;Global_active_power;Global_reactive_power;Voltage;Global_intensity;Sub_metering_1;Sub_metering_2;Sub_metering_3    0
dtype: int64

In [15]:
# Print the available columns in DataFrame
print(data.columns)

Index(['Date;Time;Global_active_power;Global_reactive_power;Voltage;Global_intensity;Sub_metering_1;Sub_metering_2;Sub_metering_3'], dtype='object')


In [None]:
# Forward-fill missing values
df = df.fillna(method='ffill')

In [None]:
# Resample to hourly and sum
df_hourly = df.resample('H', on='Time').sum()

In [None]:
# Plot hourly global active power
plt.figure(figsize=(15, 6))
plt.plot(df_hourly.index, df_hourly['Global_active_power'])
plt.xlabel('Time')
plt.ylabel('Global Active Power (Hourly)')
plt.title('Hourly Global Active Power Over Time')
plt.grid(True)
plt.show()

In [None]:
# Calculate the Pearson correlation coefficient
correlation = data['Global_active_power'].astype(float).corr(data['Voltage'].astype(float))

# Print the result to 3 decimal places
print(f"The Pearson correlation coefficient is: {correlation:.3f}")

In [None]:
# Calculate the Pearson correlation coefficient
correlation = data['Voltage'].corr(data['Global_intensity'])

# Print the result to 2 decimal places
print(f"Pearson correlation coefficient: {correlation:.2f}")

In [None]:
# Resample to daily and sum
df_daily = df.resample('D', on='Time').sum()

# Split the data into train and test sets
split_index = len(df_daily) - 300
train_df = df_daily.iloc[:split_index]
test_df = df_daily.iloc[split_index:]

In [None]:
# Rename columns for Prophet
train_df = train_df.reset_index().rename(columns={'Time': 'ds', 'Global_active_power': 'y'})
test_df = test_df.reset_index().rename(columns={'Time': 'ds', 'Global_active_power': 'y'})

In [None]:
# Create and fit the Prophet model
model = Prophet()
model.fit(train_df)

In [None]:
# Make predictions on the test set
forecast = model.predict(test_df)

In [None]:
def mean_absolute_percentage_error(y_true, y_pred):
    """Calculates MAPE given y_true and y_pred"""
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

# Calculate MAPE
mape = mean_absolute_percentage_error(test_df['y'], forecast['yhat'])

# Print MAPE with 2 decimal places
print(f"MAPE: {mape:.2f}%")

In [None]:
# Make predictions on the test set (if not already done)
forecast = model.predict(test_df)

# Calculate RMSE
from sklearn.metrics import mean_squared_error
rmse = mean_squared_error(test_df['y'], forecast['yhat'], squared=False)

# Print RMSE with 2 decimal places
print(f"RMSE: {rmse:.2f}")

In [None]:
# Plot the components of the forecast
model.plot_components(forecast)

In [None]:
# Rename columns for Prophet and regressors
df_daily = df.resample('D', on='Time').sum()
df_daily = df_daily.reset_index().rename(columns={'Time': 'ds',
                                                    'Global_active_power': 'y',
                                                    'Global_reactive_power': 'add1',
                                                    'Voltage': 'add2',
                                                    'Global_intensity': 'add3',
                                                    'Sub_metering_1': 'add4',
                                                    'Sub_metering_2': 'add5',
                                                    'Sub_metering_3': 'add6'})

# Split the data into train and test sets
split_index = len(df_daily) - 300
train_df = df_daily.iloc[:split_index]
test_df = df_daily.iloc[split_index:]

In [None]:
# Create the Prophet model
model = Prophet()

# Add regressors
model.add_regressor('add1')
model.add_regressor('add2')
model.add_regressor('add3')
model.add_regressor('add4')
model.add_regressor('add5')
model.add_regressor('add6')

# Fit the model
model.fit(train_df)

In [None]:
# Rename columns for Prophet and regressors
df_daily = df.resample('D', on='Time').sum()
df_daily = df_daily.reset_index().rename(columns={'Time': 'ds',
                                                    'Global_active_power': 'y',
                                                    'Global_reactive_power': 'add1',
                                                    'Voltage': 'add2',
                                                    'Global_intensity': 'add3',
                                                    'Sub_metering_1': 'add4',
                                                    'Sub_metering_2': 'add5',
                                                    'Sub_metering_3': 'add6'})

# Split the data into train and test sets
split_index = len(df_daily) - 300
train_df = df_daily.iloc[:split_index]
test_df = df_daily.iloc[split_index:]

In [None]:
# Create the Prophet model
model = Prophet()

# Add regressors
model.add_regressor('add1')
model.add_regressor('add2')
model.add_regressor('add3')
model.add_regressor('add4')
model.add_regressor('add5')
model.add_regressor('add6')

# Fit the model
model.fit(train_df)

In [None]:
# Make predictions on the test set
forecast = model.predict(test_df)

In [None]:
def mean_absolute_percentage_error(y_true, y_pred):
    """Calculates MAPE given y_true and y_pred"""
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

# Calculate MAPE
mape = mean_absolute_percentage_error(test_df['y'], forecast['yhat'])

# Print MAPE with 2 decimal places
print(f"MAPE: {mape:.2f}%")

In [None]:
# Calculate RMSE
rmse = mean_squared_error(test_df['y'], forecast['yhat'], squared=False)

# Print RMSE with 2 decimal places
print(f"RMSE: {rmse:.2f}")

In [None]:
# Plot the components of the forecast
model.plot_components(forecast)