In [97]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
import datetime as date

In [98]:

data = pd.read_excel("Taux_Remplissage_ComCom_Nebbiu.xlsx")
data = data.drop(['Unnamed: 0'], axis=1)
dates = data['Unnamed: 1']
data.head()

Unnamed: 0,Unnamed: 1,poubelle 1,Unnamed: 3,Unnamed: 4,poubelle 2,Unnamed: 6,Unnamed: 7,poubelle 3,Unnamed: 9,Unnamed: 10,...,Unnamed: 13,poubelle 5,Unnamed: 15,Unnamed: 16,poubelle 6,Unnamed: 18,Unnamed: 19,poubelle 7,Unnamed: 21,Unnamed: 22
0,date,GPS,remplissage,coeff touriste,GPS,remplissage,coeff touriste,GPS,remplissage,coeff touriste,...,coeff touriste,GPS,remplissage,coeff touriste,GPS,remplissage,coeff touriste,GPS,remplissage,coeff touriste
1,2023-01-01 00:00:00,"42°36'39""N 9°21'14""E",5,1,"42°36'44""N 9°21'24""E",18,1,"42°36'51""N 9°21'17""E",21,1,...,1,"42°37'01""N 9°21'20""E",9,1,"42°36'30""N 9°21'29""E",13,1,"42°36'07""N 9°21'35""E",11,1
2,2023-01-02 00:00:00,,40,1,,51,1,,46,1,...,1,,51,1,,45,1,,40,1
3,2023-01-03 00:00:00,,0,1,,0,1,,0,1,...,1,,0,1,,0,1,,0,1
4,2023-01-04 00:00:00,,37,1,,25,1,,30,1,...,1,,38,1,,42,1,,49,1


In [99]:

def create_df(file_name: str, num_poubelle: int, village: str) -> pd.DataFrame:
    data = pd.read_excel(file_name, sheet_name=village)
    df = pd.DataFrame({
        'date': data['Unnamed: 1'],
        'remplissage': data[f'Unnamed: {num_poubelle*3}'][1:].astype(float),
        'coeff': data[f'Unnamed: {num_poubelle*3+1}'][1:].astype(float)
    }).dropna()
    return df


def scale_and_predict(features, scaler, model):
    lag_features = [f'remplissage_{i}d_ago' for i in range(3, 6)]
    input_features = [features[0]] + [df[lag_feature].values[-1]
                                      for lag_feature in lag_features]
    input_features_scaled = scaler.transform([input_features])
    input_features_scaled = input_features_scaled.reshape(1, -1)
    prediction = model.predict(input_features_scaled)

    return prediction


def create_model(df: pd.DataFrame) -> LinearRegression:
    for i in range(3, 6):
        df[f'remplissage_{i}d_ago'] = df['remplissage'].shift(i).fillna(0)

    X = df.drop(['date', 'remplissage'], axis=1)
    y = df['remplissage']

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    model = LinearRegression()
    model.fit(X_scaled, y)

    return model


def train_model(df: pd.DataFrame, model: LinearRegression, date_: date.datetime) -> bool:
    future_date = pd.to_datetime(date_)
    features_future = [df.loc[df['date'] == future_date - date.timedelta(days=i), 'remplissage'].values[0]
                       for i in range(3, 6) if len(df.loc[df['date'] == future_date - date.timedelta(days=i), 'remplissage']) > 0]

    if not features_future:
        print("No past data available for scaling and prediction.")
        return False

    last_emptied_index = df[df['remplissage'] == 0].index[-1]
    index_date = df[df['date'] == date_].index[0]

    if df['remplissage'][index_date] >= 75:
        print("Condition 1: True")
        return True
    elif df['coeff'][index_date] == 3:
        print("Condition 2: True")
        return True
    elif df['coeff'][index_date] == 2 and df.loc[last_emptied_index - 1, 'remplissage'] != 0:
        print("Condition 3: True")
        return True
    elif df['coeff'][index_date] == 1:
        last_emptied_date = df.loc[last_emptied_index, 'date']
        rate_of_increase = df.loc[(df['date'] >= last_emptied_date) & (
            df['date'] <= date_), 'remplissage']
        if df['remplissage'][index_date] >= 50 and rate_of_increase.sum() > 20:
            print("Condition 4: True")
            return True

    print("No condition met.")
    return False

df = create_df("Taux_Remplissage_ComCom_Nebbiu.xlsx",
               1, "santu petro di tenda ")
model = create_model(df)
train_model(df, model, date.datetime(2023, 1, 26))

No condition met.


False