In [75]:
#%pip install scikit-learn

In [76]:
import os
import pandas as pd
import sys


def read_csv(filename="water_consumption_updated.csv", delimiter=";"):
    if not os.path.exists(filename):
        print(f"File '{filename}' does not exist.")
        sys.exit(1)

    df = pd.read_csv(filename, delimiter=delimiter)
    if "Consumption" in df.columns:
        df["Consumption"] = pd.to_numeric(df["Consumption"].str.replace(",", "."))
        df["Month"] = pd.to_numeric(df["Month"], errors="coerce")
        df["Year"] = pd.to_numeric(df["Year"], errors="coerce")
        df["Day"] = pd.to_numeric(df["Day"], errors="coerce")

    if "Area" in df.columns:
        df["Area"] = pd.to_numeric(df["Area"].str.replace(",", "."), errors="coerce")

    if df.empty or len(df) == 1:
        raise ValueError("Empty file or file with only header.")
    return df


In [77]:
def calculate_monthly_cost(data):
    data["MonthlyCost"] = data["Consumption"].apply(
        lambda x: x * 0.7 if x <= 1000 else 1000 * 0.7 + (x - 1000) * 0.7 * 1.15)
    return data


def average_monthly_cost(data):
    return data.groupby("Park")["MonthlyCost"].mean()


In [78]:
from sklearn.linear_model import LinearRegression


def predict_cost():
    # Read the data
    df = read_csv("water_consumption_updated.csv")
    areas = read_csv("Area.csv")

    # Merge the consumption data with the area data
    df = pd.merge(df, areas, on="Park", how="left")

    # Group the data by park and month, and calculate the total consumption for each month
    df["MonthYear"] = pd.to_datetime(df['Year'].astype(str) + df['Month'].astype(str), format='%Y%m')
    df = df.groupby(["Park", "MonthYear"])["Consumption"].sum().reset_index()

    # Calculate the monthly cost for each park
    df = calculate_monthly_cost(df)

    # Calculate the average cost per park
    average_costs = average_monthly_cost(df)

    # Convert the average_costs Series to a DataFrame
    average_costs = average_costs.reset_index()
    average_costs.columns = ["Park", "AverageCost"]

    # Merge the average costs with the area data
    data = pd.merge(areas, average_costs, on="Park")

    # Perform a linear regression
    X = data["Area"].values.reshape(-1, 1)
    y = data["AverageCost"]
    model = LinearRegression().fit(X, y)

    # Predict the average monthly cost
    new_park_area = 55
    predicted_cost = model.predict([[new_park_area]])
    print(f"The predicted average monthly cost for a 55-hectare park is {predicted_cost[0]:.2f}€")

In [79]:
def menu():
    print("-x-x-x-x-x-x- MENU -x-x-x-x-x-x-x-")
    print("1. Average park cost")

    choice = input("Enter your choice: ")

    match choice:
        case "1":
            predict_cost()
        case _:
            print("Invalid choice")


menu()

-x-x-x-x-x-x- MENU -x-x-x-x-x-x-x-
1. Average park cost
The predicted average monthly cost for a 55-hectare park is 1934.17€
