## Modules

# Imports

In [1]:
import pandas as pd 
from tqdm import tqdm
from lstmModel import lstm_model, get_train_test_data
import warnings
warnings.filterwarnings("ignore")

## Data

In [2]:
path = ""
filename = "D:\PFE\Fact_Table_Rev.csv"
df= pd.read_csv(path + filename, usecols=["date_saisie", "Fk_famille_prestation","Fk_typesociete","montant_HT"])
# keep onlt records between 2006 and 2019
df = df[(df["date_saisie"] >= "2006-01-01") & (df["date_saisie"] <= "2019-12-31")]
codes_to_remove = ['DEBO','P.S','DIV','LOCAT','PEIG','BALNEO','S.COIF','PR']  


df =df[~df['Fk_famille_prestation'].isin(codes_to_remove)]
all_prestations = df["Fk_famille_prestation"].unique()

# Model

In [10]:
models_dict = {}
predictions_dict = {}
for prestation in tqdm(all_prestations):
    print("-------------------")
    print(f"Training model for prestation {prestation}")
    prestation_data = df[df["Fk_famille_prestation"] == prestation]
    train_data, test_data = get_train_test_data(prestation_data)
    model = lstm_model(seq_length=10)
    model.fit(train_data, epochs=1, batch_size=32)
    predictions=model.predict(test_data)
    model.evaluate()
    models_dict[prestation] = model
    predictions_dict[prestation] = predictions
grouped_predictions = predictions_dict.groupby('prestation')['prediction'].sum().reset_index()
merged_data1 = grouped_predictions.merge(test_data, on='prestation')

  0%|          | 0/12 [00:00<?, ?it/s]

-------------------
Training model for prestation BAR
[1m5873/5873[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 2ms/step - loss: 1115.3562
[1m358/358[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m358/358[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 9721.7695


  8%|▊         | 1/12 [00:18<03:20, 18.23s/it]

Mean Squared Error: 7573.68994140625
Mean Absolute Percentage Error: inf %
-------------------
Training model for prestation CAVE
[1m2484/2484[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2ms/step - loss: 9308.5693
[1m202/202[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m202/202[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 13739.3936


 17%|█▋        | 2/12 [00:26<02:03, 12.36s/it]

Mean Squared Error: 56723.765625
Mean Absolute Percentage Error: inf %
-------------------
Training model for prestation RESTO
[1m3244/3244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 2ms/step - loss: 378414.0000
[1m398/398[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m398/398[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 682340.6875


 25%|██▌       | 3/12 [00:38<01:49, 12.15s/it]

Mean Squared Error: 694702.9375
Mean Absolute Percentage Error: inf %
-------------------
Training model for prestation SEJOUR
[1m3153/3153[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 2ms/step - loss: 10363.9658
[1m338/338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m338/338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 5591.9302


 33%|███▎      | 4/12 [00:48<01:31, 11.47s/it]

Mean Squared Error: 5672.32421875
Mean Absolute Percentage Error: inf %
-------------------
Training model for prestation MB
[1m1921/1921[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - loss: 10.0871
[1m122/122[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
[1m122/122[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 28.0787


 42%|████▏     | 5/12 [00:55<01:07,  9.63s/it]

Mean Squared Error: 24.620450973510742
Mean Absolute Percentage Error: inf %
-------------------
Training model for prestation TEL
[1m174/174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 137.3119
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 56.8840  


 50%|█████     | 6/12 [00:57<00:42,  7.05s/it]

Mean Squared Error: 54.1636962890625
Mean Absolute Percentage Error: 111.84716159137562 %
-------------------
Training model for prestation PTD
[1m2614/2614[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2ms/step - loss: 3384.0388
[1m298/298[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m298/298[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 70.7002


 58%|█████▊    | 7/12 [01:06<00:38,  7.67s/it]

Mean Squared Error: 85.25514221191406
Mean Absolute Percentage Error: inf %
-------------------
Training model for prestation LIN
[1m712/712[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 214.0932
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 949.1030 


 67%|██████▋   | 8/12 [01:09<00:25,  6.35s/it]

Mean Squared Error: 820.6195068359375
Mean Absolute Percentage Error: inf %
-------------------
Training model for prestation PISC
[1m198/198[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2292.3979
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 10953.2578  


 75%|███████▌  | 9/12 [01:11<00:15,  5.02s/it]

Mean Squared Error: 7936.869140625
Mean Absolute Percentage Error: 44.79790619764994 %
-------------------
Training model for prestation LOY
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 5880079.5000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 118ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 151ms/step - loss: 2.8649e-04


 83%|████████▎ | 10/12 [01:13<00:07,  3.95s/it]

Mean Squared Error: 0.00028649086016230285
Mean Absolute Percentage Error: inf %
-------------------
Training model for prestation TORRET
[1m941/941[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 5600.5200
[1m410/410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m410/410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 3453.6038


 92%|█████████▏| 11/12 [01:19<00:04,  4.67s/it]

Mean Squared Error: 2933.526123046875
Mean Absolute Percentage Error: inf %
-------------------
Training model for prestation BANQUE
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 11078162.0000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 121ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 152ms/step - loss: 8458986.0000
Mean Squared Error: 8458986.0
Mean Absolute Percentage Error: 100.05741904188096 %


100%|██████████| 12/12 [01:21<00:00,  6.77s/it]


AttributeError: 'dict' object has no attribute 'groupby'

In [8]:
import pandas as pd

# Convert predictions dictionary to a DataFrame
predictions_list = []
for prestation, preds in predictions_dict.items():
    # Assuming preds is a numpy array or a list of predictions
    for pred in preds:
        predictions_list.append({'prestation': prestation, 'prediction': pred})

predictions_df = pd.DataFrame(predictions_list)

# Group by prestation and sum the predictions

merged_data1



KeyError: 'Fk_famille_prestation'