In [1]:
import pandas as pd
import plotly.graph_objects as go
import pickle
import os
from datetime import datetime
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingRegressor

# Constants for environmental impact calculations
CARBON_EMISSION_PER_KWH = 0.5  # in kg CO2
CARBON_EMISSION_REDUCTION_FACTOR = 0.1

# Load or initialize historical data storage
historical_data_path = 'historical_energy_consumption.csv'
if os.path.exists(historical_data_path):
    historical_data = pd.read_csv(historical_data_path)
else:
    historical_data = pd.DataFrame(columns=['User_ID', 'Country', 'Month', 'Year', 'Date', 'Appliances', 'Predicted_Energy_kWh'])

# Load the datasets
energy = pd.read_csv('Household_energy_data.csv')
govt_schemes = pd.read_csv('govt schemes.csv')
energy_providers = pd.read_csv('energy_providers.csv')

# Sample and preprocess energy data
energy = energy.sample(10000, random_state=15)
energy['Year'] = pd.to_datetime(energy['Year'], format='%Y').dt.strftime('%Y')
energy['Month'] = pd.to_datetime(energy['Month'], format='%b').dt.strftime('%b')
energy['Household_ID'] = energy['Household_ID'].astype(str)
energy_con = energy.drop(columns=['Total_Consumption_kWh'])

# Create dummy variables or preprocess data further as needed
data = pd.get_dummies(energy_con, columns=['Country', 'Appliance', 'Month', 'Year'])
data = data.drop(columns=['Household_ID'])

# Define features and target
Y = data['Energy_Consumption_kWh']
X = data.drop(columns=['Energy_Consumption_kWh'])

# Split data into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train the model
model = GradientBoostingRegressor(n_estimators=100, random_state=42)
model.fit(X_train_scaled, Y_train)

# Save the model, scaler, and column names
with open('model.pkl', 'wb') as model_file:
    pickle.dump(model, model_file)
with open('scaler.pkl', 'wb') as scaler_file:
    pickle.dump(scaler, scaler_file)
with open('columns.pkl', 'wb') as columns_file:
    pickle.dump(X_train.columns.tolist(), columns_file)

def calculate_cost_savings(predicted_energy_kwh, current_energy_cost_per_kwh, alternate_energy_cost_per_kwh):
    current_cost = predicted_energy_kwh * current_energy_cost_per_kwh
    alternate_cost = predicted_energy_kwh * alternate_energy_cost_per_kwh
    savings = current_cost - alternate_cost
    return current_cost, alternate_cost, savings

def calculate_environmental_impact(predicted_energy_kwh):
    current_emissions = predicted_energy_kwh * CARBON_EMISSION_PER_KWH
    reduced_emissions = current_emissions * (1 - CARBON_EMISSION_REDUCTION_FACTOR)
    return current_emissions, reduced_emissions

def view_historical_data(historical_data, current_prediction, user_id, current_date):
    try:
        user_history = historical_data[historical_data['User_ID'] == user_id]
        if user_history.empty:
            print("No historical data found for the user.")
            return

        # Convert Date to datetime for plotting
        user_history['Date'] = pd.to_datetime(user_history['Date'], format='%Y-%m-%d')

        # Filter out rows with invalid dates
        user_history = user_history.dropna(subset=['Date'])

        # Create Plotly figure
        fig = go.Figure()

        # Add historical data as bar plot
        fig.add_trace(go.Bar(
            x=user_history['Date'],
            y=user_history['Predicted_Energy_kWh'],
            marker_color='blue',
            name='Historical Data',
            hovertext=user_history.apply(lambda row: f"{row['Date'].strftime('%Y-%m-%d')}<br>{row['Predicted_Energy_kWh']} kWh", axis=1)
        ))

        # Add current prediction as a vertical line at the correct date
        fig.add_trace(go.Scatter(
            x=[current_date, current_date],
            y=[0, current_prediction],
            mode='lines',
            line=dict(color='red', dash='dash'),
            name='Current Prediction',
            hovertext='Current Prediction'
        ))

        # Update layout
        fig.update_layout(
            title=f'Historical vs Current Energy Consumption for User ID: {user_id}',
            xaxis_title='Date',
            yaxis_title='Energy Consumption (kWh)',
            xaxis=dict(tickformat='%Y-%m-%d'),
            xaxis_tickangle=-45,
            showlegend=True,
            legend=dict(x=0.02, y=0.98)
        )

        # Show the plot
        fig.show()


    except Exception as e:
        print(f"Error occurred during plotting: {e}")



def predict_energy_consumption(user_id):
    global historical_data

    try:
        while True:
            country = input("Enter country: ").strip().capitalize()
            if country in energy['Country'].unique():
                break
            else:
                print("Invalid country. Please enter a valid country from the list.")

        user_country_history = historical_data[(historical_data['User_ID'] == user_id) & (historical_data['Country'] != country)]
        if not user_country_history.empty:
            print(f"User already registered with different country data: {user_country_history['Country'].unique()}")
            return historical_data, None, None, None, None, None, False

        while True:
            month = input("Enter month (e.g., Jan, Feb, etc.): ").strip().capitalize()
            valid_months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
                            'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
            if month in valid_months:
                break
            else:
                print("Invalid month. Please enter a valid month abbreviation (e.g., Jan).")

        while True:
            year = input("Enter year (e.g., 2020): ").strip()
            if year.isdigit() and 2010 <= int(year) <= 2100:
                break
            else:
                print("Invalid year. Please enter a valid year (between 2010 and 2100).")

        while True:
            date = input("Enter the specific date (YYYY-MM-DD): ").strip()
            try:
                datetime.strptime(date, '%Y-%m-%d')
                break
            except ValueError:
                print("Invalid date format. Please enter a valid date (YYYY-MM-DD).")

        while True:
            try:
                num_appliances = int(input("Enter number of appliances: "))
                if num_appliances <= 0:
                    raise ValueError("Number of appliances must be greater than zero.")
                break
            except ValueError:
                print("Invalid input. Please enter a valid number.")

        appliances = []
        for i in range(num_appliances):
            while True:
                appliance = input(f"Enter appliance {i+1}: ").strip().capitalize()
                if appliance in energy['Appliance'].unique():
                    appliances.append(appliance)
                    break
                else:
                    print("Invalid appliance. Please enter a valid appliance from the list.")

        while True:
            try:
                current_energy_cost_per_kwh = float(input("Enter current energy cost per kWh (e.g., 0.12): "))
                if current_energy_cost_per_kwh <= 0:
                    raise ValueError("Energy cost must be greater than zero.")
                break
            except ValueError:
                print("Invalid input. Please enter a valid energy cost per kWh.")

        with open('scaler.pkl', 'rb') as scaler_file:
            scaler = pickle.load(scaler_file)

        with open('columns.pkl', 'rb') as columns_file:
            columns = pickle.load(columns_file)

        with open('model.pkl', 'rb') as model_file:
            model = pickle.load(model_file)

        total_prediction = 0
        max_energy_consumption = 0
        max_energy_appliance = ""

        for appliance in appliances:
            input_data = pd.DataFrame({
                'Country': [country],
                'Month': [month],
                'Year': [year],
                'Appliance': [appliance]
            })
            input_data = pd.get_dummies(input_data, columns=['Country', 'Appliance', 'Month', 'Year'])
            input_data = input_data.reindex(columns=columns, fill_value=0)
            input_data_scaled = scaler.transform(input_data)
            prediction = model.predict(input_data_scaled)
            total_prediction += prediction[0]

            if prediction[0] > max_energy_consumption:
                max_energy_consumption = prediction[0]
                max_energy_appliance = appliance

        # Append to historical data
        new_data = pd.DataFrame({
            'User_ID': [user_id],
            'Country': [country],
            'Month': [month],
            'Year': [year],
            'Date': [date],
            'Appliances': [', '.join(appliances)],
            'Predicted_Energy_kWh': [total_prediction]
        })
        historical_data = pd.concat([historical_data, new_data], ignore_index=True)
        historical_data.to_csv(historical_data_path, index=False)

        # Display government schemes for the selected country
        applicable_schemes = govt_schemes[ govt_schemes['Country'] == country ]
        print(f"Applicable government schemes for {country}:")
        for scheme in applicable_schemes['Scheme/Subsidy']:
            print(f"- {scheme}")

        return historical_data, total_prediction, max_energy_consumption, max_energy_appliance, current_energy_cost_per_kwh, date, True

    except Exception as e:
        print(f"An error occurred: {e}")
        return historical_data, None, None, None, None, None, False


def main():
    global historical_data

    print("Welcome to the Energy Consumption Prediction Project!\n")

    user_id = input("Enter your User ID: ").strip()
    if not user_id:
        print("User ID cannot be empty.")
        return

    historical_data, total_prediction, max_energy_consumption, max_energy_appliance, current_energy_cost_per_kwh, date, valid_input = predict_energy_consumption(user_id)
    if not valid_input:
        print("Prediction aborted due to invalid input or other issues.")
        return

    # Print prediction details
    print(f"\nPrediction for User ID: {user_id}")
    print(f"Country: {historical_data.loc[historical_data['User_ID'] == user_id, 'Country'].values[-1]}")
    print(f"Month: {historical_data.loc[historical_data['User_ID'] == user_id, 'Month'].values[-1]}")
    print(f"Year: {historical_data.loc[historical_data['User_ID'] == user_id, 'Year'].values[-1]}")
    print(f"Date: {date}")
    print(f"Appliances: {historical_data.loc[historical_data['User_ID'] == user_id, 'Appliances'].values[-1]}")
    print(f"Predicted Total Energy Consumption: {total_prediction:.2f} kWh")
    print(f"Appliance with Maximum Energy Consumption: {max_energy_appliance} ({max_energy_consumption:.2f} kWh)")

    while True:
        view_history = input("Do you want to view your historical energy consumption data? (yes/no): ").strip().lower()
        if view_history in ['yes', 'no']:
            break
        else:
            print("Invalid input. Please enter 'yes' or 'no'.")

    if view_history == 'yes':
        view_historical_data(historical_data, total_prediction, user_id, date)

    while True:
        explore_alternatives = input("Do you want to explore alternative energy sources? (yes/no): ").strip().lower()
        if explore_alternatives in ['yes', 'no']:
            break
        else:
            print("Invalid input. Please enter 'yes' or 'no'.")

    if explore_alternatives == 'yes':
        alternative_selected = False
        user_appliances = historical_data.loc[historical_data['User_ID'] == user_id, 'Appliances'].values[-1].split(', ')

        for _, row in energy_providers.iterrows():
            provider = row['Provider']
            energy_source = row['Energy_Source']
            cost_per_kwh = row['Cost_per_kWh']
            appliances = row['Appliance'].split(', ')

            applicable_appliances = [appliance for appliance in appliances if appliance in user_appliances]

            if applicable_appliances:
                print(f"\nProvider: {provider}")
                print(f"Energy Source: {energy_source}")
                print(f"Cost per kWh: ${cost_per_kwh}")
                print(f"Applicable Appliances: {', '.join(applicable_appliances)}")

                while True:
                    select_alternative = input("Do you want to select this alternative energy source? (yes/no): ").strip().lower()
                    if select_alternative in ['yes', 'no']:
                        break
                    else:
                        print("Invalid input. Please enter 'yes' or 'no'.")

                if select_alternative == 'yes':
                    current_cost, alternate_cost, savings = calculate_cost_savings(total_prediction, current_energy_cost_per_kwh, cost_per_kwh)
                    current_emissions, reduced_emissions = calculate_environmental_impact(total_prediction)

                    print(f"\nCost Savings with {provider}:")
                    print(f"Current Energy Cost: ${current_cost:.2f}")
                    print(f"Alternate Energy Cost: ${alternate_cost:.2f}")
                    print(f"Total Savings: ${savings:.2f}")

                    print(f"\nEnvironmental Impact with {provider}:")
                    print(f"Current Carbon Emissions: {current_emissions:.2f} kg CO2")
                    print(f"Reduced Carbon Emissions: {reduced_emissions:.2f} kg CO2")

                    while True:
                        use_visa = input("Do you want to use Visa for payment to avail additional benefits? (yes/no): ").strip().lower()
                        if use_visa in ['yes', 'no']:
                            break
                        else:
                            print("Invalid input. Please enter 'yes' or 'no'.")

                    if use_visa == 'yes':
                        print("Applying Visa benefits...")
                        visa_benefits = {
                            'discount': 0.05,
                            'cashback': 0.02,
                            'reward_points': 100,
                            'exclusive_offers': "10% off on next billing cycle"
                        }
                        discount_amount = savings * visa_benefits['discount']
                        cashback_amount = alternate_cost * visa_benefits['cashback']
                        final_cost = alternate_cost - discount_amount - cashback_amount
                        print(f"\nVisa Benefits Applied:")
                        print(f"Discount Amount: ${discount_amount:.2f}")
                        print(f"Cashback Amount: ${cashback_amount:.2f}")
                        print(f"Reward Points: {visa_benefits['reward_points']}")
                        print(f"Exclusive Offers: {visa_benefits['exclusive_offers']}")
                        print(f"Final Energy Cost with Visa: ${final_cost:.2f}")
                    else:
                        print("Proceeding without Visa benefits.")
                    alternative_selected = True
                    break
                else:
                    print("Proceeding without selecting this alternative energy source.")

        if not alternative_selected:
            print("No alternative energy source selected.")

    while True:
        another_prediction = input("Do you want to make another prediction? (yes/no): ").strip().lower()
        if another_prediction in ['yes', 'no']:
            break
        else:
            print("Invalid input. Please enter 'yes' or 'no'.")

    if another_prediction == 'yes':
        main()
    else:
        print("Thank you for using the Energy Consumption Prediction Project!")

if __name__ == "__main__":
    main()



Welcome to the Energy Consumption Prediction Project!

Applicable government schemes for Japan:
- Feed-in Tariffs, Renewable Energy Special Measures Act

Prediction for User ID: akku
Country: Japan
Month: Jan
Year: 2020
Date: 2020-01-09
Appliances: Fan
Predicted Total Energy Consumption: 31.95 kWh
Appliance with Maximum Energy Consumption: Fan (31.95 kWh)
Error occurred during plotting: Mime type rendering requires nbformat>=4.2.0 but it is not installed

Provider: SolarCo
Energy Source: Solar
Cost per kWh: $0.08
Applicable Appliances: Fan

Cost Savings with SolarCo:
Current Energy Cost: $3.83
Alternate Energy Cost: $2.56
Total Savings: $1.28

Environmental Impact with SolarCo:
Current Carbon Emissions: 15.98 kg CO2
Reduced Carbon Emissions: 14.38 kg CO2
Applying Visa benefits...

Visa Benefits Applied:
Discount Amount: $0.06
Cashback Amount: $0.05
Reward Points: 100
Exclusive Offers: 10% off on next billing cycle
Final Energy Cost with Visa: $2.44
Thank you for using the Energy Consum