<a target="_blank" href="https://colab.research.google.com/github/BuczynskiRafal/stormwater-analysis/blob/main/stormwater_analysis/data/catchment_classification_model/first_approach_classification.ipynb">

<a target="_blank" href="https://colab.research.google.com/github/BuczynskiRafal/stormwater-analysis/blob/main/stormwater_analysis/data/catchment_classification_model/first_approach_classification.ipynb">
    <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

# Training a model for catchment classification. 
* kategoryzacja zlewni oparta o większość dostępnych ceech zlewni z plików inp i rpt

# Imports

In [2]:
import swmmio
import pyswmm
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Normalization, Input
from tensorflow.keras.callbacks import EarlyStopping

desired_width = 500
pd.set_option("display.width", desired_width)
np.set_printoptions(linewidth=desired_width)
pd.set_option("display.max_columns", 30)

In [3]:
classes = pd.DataFrame(
    data={
        "classes": [
            "marshes",
            "arable",
            "meadows",
            "forests",
            "rural",
            "suburban_weakly_impervious",
            "suburban_highly_impervious",
            "urban_weakly_impervious",
            "urban_moderately_impervious",
            "urban_highly_impervious",
            "mountains_rocky",
            "mountains_vegetated",
        ]
    }
)
classes

Unnamed: 0,classes
0,marshes
1,arable
2,meadows
3,forests
4,rural
5,suburban_weakly_impervious
6,suburban_highly_impervious
7,urban_weakly_impervious
8,urban_moderately_impervious
9,urban_highly_impervious


# Get files

In [4]:
INP_FILE_00 = "dataset/dataset_0.inp"
INP_FILE_01 = "dataset/dataset_1.inp"
INP_FILE_02 = "dataset/dataset_2.inp"
INP_FILE_03 = "dataset/dataset_3.inp"
INP_FILE_04 = "dataset/dataset_4.inp"
INP_FILE_05 = "dataset/dataset_5.inp"
INP_FILE_06 = "dataset/dataset_6.inp"
INP_FILE_07 = "dataset/dataset_7.inp"
INP_FILE_08 = "dataset/dataset_8.inp"
INP_FILE_09 = "dataset/dataset_9.inp"
files = [INP_FILE_00, INP_FILE_01, INP_FILE_02, INP_FILE_03, INP_FILE_04, INP_FILE_05, INP_FILE_06, INP_FILE_07, INP_FILE_08, INP_FILE_09]

# Run simulation

In [5]:
for f in files:
    with pyswmm.Simulation(f) as sim:
        for step in sim:
            pass

# Read inp and rpt file as swmmio model object

In [6]:
model_00 = swmmio.Model(INP_FILE_00)
model_01 = swmmio.Model(INP_FILE_01)
model_02 = swmmio.Model(INP_FILE_02)
model_03 = swmmio.Model(INP_FILE_03)
model_04 = swmmio.Model(INP_FILE_04)
model_05 = swmmio.Model(INP_FILE_05)
model_06 = swmmio.Model(INP_FILE_06)
model_07 = swmmio.Model(INP_FILE_07)
model_08 = swmmio.Model(INP_FILE_08)
model_09 = swmmio.Model(INP_FILE_09)

models = [model_00, model_01, model_02, model_03, model_04, model_05, model_06, model_07, model_08, model_09]

## Get subcatchments data from the model

In [7]:
raw_subcatchments_00 = model_00.subcatchments.dataframe
raw_subcatchments_01 = model_01.subcatchments.dataframe
raw_subcatchments_02 = model_02.subcatchments.dataframe
raw_subcatchments_03 = model_03.subcatchments.dataframe
raw_subcatchments_04 = model_04.subcatchments.dataframe
raw_subcatchments_05 = model_05.subcatchments.dataframe
raw_subcatchments_06 = model_06.subcatchments.dataframe
raw_subcatchments_07 = model_07.subcatchments.dataframe
raw_subcatchments_08 = model_08.subcatchments.dataframe
raw_subcatchments_09 = model_09.subcatchments.dataframe

subcatchments_00 = raw_subcatchments_00.copy()
subcatchments_01 = raw_subcatchments_01.copy()
subcatchments_02 = raw_subcatchments_02.copy()
subcatchments_03 = raw_subcatchments_03.copy()
subcatchments_04 = raw_subcatchments_04.copy()
subcatchments_05 = raw_subcatchments_05.copy()
subcatchments_06 = raw_subcatchments_06.copy()
subcatchments_07 = raw_subcatchments_07.copy()
subcatchments_08 = raw_subcatchments_08.copy()
subcatchments_09 = raw_subcatchments_09.copy()

In [8]:
print(subcatchments_00.head(50))

       Raingage Outlet  Area  PercImperv  Width  PercSlope  CurbLength  N-Imperv  N-Perv  S-Imperv  S-Perv  PctZero RouteTo TotalPrecip TotalRunon TotalEvap TotalInfil ImpervRunoff PervRunoff  TotalRunoffIn  TotalRunoffMG  PeakRunoff  RunoffCoeff                                                                                                                                                                      coords
Name                                                                                                                                                                                                                                                                                                                                                                                                                              
S1    Gage15min     O1  0.57        3.54  75.45       1.94           0      0.06    0.15      1.27    5.13    14.02  OUTLET       25.00       0.00      0.00      

In [9]:
subcatchments = []
for frame in [subcatchments_00, subcatchments_01, subcatchments_02, subcatchments_03, subcatchments_04, subcatchments_05, subcatchments_06, subcatchments_07, subcatchments_08, subcatchments_09]:
    frame["TotalInfil"] = pd.to_numeric(frame["TotalInfil"], errors='coerce')
    frame["ImpervRunoff"] = pd.to_numeric(frame["ImpervRunoff"], errors='coerce')
    frame["TotalRunoffMG"] = pd.to_numeric(frame["TotalRunoffMG"], errors='coerce')
    frame["PeakRunoff"] = pd.to_numeric(frame["PeakRunoff"], errors='coerce')

    df = frame[["PercImperv", "PercSlope", "N-Imperv", "N-Perv", "S-Imperv", "S-Perv", "PctZero", "RunoffCoeff"]].copy()
    df["TotalInfil"] = frame["TotalInfil"] / frame["Area"] if frame["TotalInfil"].sum() != 0 else 0
    df["ImpervRunoff"] = frame["ImpervRunoff"] / frame["Area"] if frame["ImpervRunoff"].sum() != 0 else 0
    df["TotalRunoffMG"] = frame["TotalRunoffMG"] / frame["Area"] if frame["TotalRunoffMG"].sum() != 0 else 0
    df["PeakRunoff"] = frame["PeakRunoff"] / frame["Area"] if frame["PeakRunoff"].sum() != 0 else 0
    subcatchments.append(df)

print(subcatchments[0].head(50))

      PercImperv  PercSlope  N-Imperv  N-Perv  S-Imperv  S-Perv  PctZero  RunoffCoeff  TotalInfil  ImpervRunoff  TotalRunoffMG  PeakRunoff
Name                                                                                                                                      
S1          3.54       1.94      0.06    0.15      1.27    5.13    14.02        0.035   42.245614      1.491228       0.017544    0.000000
S10        11.26      15.01      0.02    0.06      1.33    4.65     8.45        0.122   33.600000      4.138462       0.030769    0.030769
S100       45.42       2.24      0.02    0.22      1.27    5.58    39.20        0.441   22.360656     18.049180       0.114754    0.065574
S11         0.10       3.03      0.05    0.16      1.27    4.38    14.42        0.007   85.586207      0.068966       0.000000    0.000000
S12         6.99       5.18      0.01    0.39      1.27    5.04    33.63        0.069   68.294118      5.000000       0.029412    0.000000
S13        46.30       8.00

## Get categories

In [10]:
categories_00 = model_00.inp.tags
categories_01 = model_01.inp.tags
categories_02 = model_02.inp.tags
categories_03 = model_03.inp.tags
categories_04 = model_04.inp.tags
categories_05 = model_05.inp.tags
categories_06 = model_06.inp.tags
categories_07 = model_07.inp.tags
categories_08 = model_08.inp.tags
categories_09 = model_09.inp.tags

### Add categories column to subcatchments DataFrame

In [11]:
def merge_tag(subcatchment, model):
    subcatchment.reset_index(inplace=True)
    merged = subcatchment.merge(model.inp.tags, left_on="Name", right_on="Name", how="left")
    merged.rename(columns={"Tag": "categories"}, inplace=True)
    merged.set_index("Name", inplace=True)
    return merged

merged_df_00 = merge_tag(subcatchments[0], model_00)
merged_df_01 = merge_tag(subcatchments[1], model_01)
merged_df_02 = merge_tag(subcatchments[2], model_02)
merged_df_03 = merge_tag(subcatchments[3], model_03)
merged_df_04 = merge_tag(subcatchments[4], model_04)
merged_df_05 = merge_tag(subcatchments[5], model_05)
merged_df_06 = merge_tag(subcatchments[6], model_06)
merged_df_07 = merge_tag(subcatchments[7], model_07)
merged_df_08 = merge_tag(subcatchments[8], model_08)
merged_df_09 = merge_tag(subcatchments[9], model_09)

merged = [merged_df_00, merged_df_01, merged_df_02, merged_df_03, merged_df_04, merged_df_05, merged_df_06, merged_df_07, merged_df_08, merged_df_09]

df = pd.concat(merged)
df = df.reset_index(drop=True)

# dft = df.T

# print(df.describe().T.to_excel("describe.xlsx"))
# rows_with_zero = df[df["TotalRunoffMG"] == 0]

# Wyświetl wynik
# print(f"Wiersze, w których TotalRunoffMG przyjmuje wartość 0:")
# print(rows_with_zero)

In [12]:
df[:50]

Unnamed: 0,PercImperv,PercSlope,N-Imperv,N-Perv,S-Imperv,S-Perv,PctZero,RunoffCoeff,TotalInfil,ImpervRunoff,TotalRunoffMG,PeakRunoff,categories
0,3.54,1.94,0.06,0.15,1.27,5.13,14.02,0.035,42.245614,1.491228,0.017544,0.0,arable
1,11.26,15.01,0.02,0.06,1.33,4.65,8.45,0.122,33.6,4.138462,0.030769,0.030769,mountains_vegetated
2,45.42,2.24,0.02,0.22,1.27,5.58,39.2,0.441,22.360656,18.04918,0.114754,0.065574,suburban_highly_impervious
3,0.1,3.03,0.05,0.16,1.27,4.38,14.42,0.007,85.586207,0.068966,0.0,0.0,arable
4,6.99,5.18,0.01,0.39,1.27,5.04,33.63,0.069,68.294118,5.0,0.029412,0.0,rural
5,46.3,8.0,0.01,0.25,1.27,5.28,38.83,0.451,39.382353,33.088235,0.117647,0.058824,suburban_highly_impervious
6,5.52,2.9,0.4,0.79,1.48,7.62,1.53,0.052,78.733333,4.333333,0.0,0.0,forests
7,2.82,7.82,0.16,0.43,1.27,5.84,13.15,0.027,347.0,9.714286,0.0,0.0,meadows
8,0.1,2.34,0.07,0.15,1.27,5.34,6.25,0.002,29.364706,0.023529,0.0,0.0,arable
9,5.64,3.33,0.4,0.77,1.31,7.62,4.57,0.054,36.859375,2.09375,0.015625,0.015625,forests


### Split data into features and target

In [13]:
X = df.drop('categories', axis=1)
y = df['categories']

# df = df[df["TotalRunoffMG"] != 0]
# df.head()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

y_train = pd.get_dummies(y_train)
y_test = pd.get_dummies(y_test)

print(y_train)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

     arable  forests  marshes  meadows  mountains_rocky  mountains_vegetated  rural  suburban_highly_impervious  suburban_weakly_impervious  urban_highly_impervious  urban_moderately_impervious  urban_weakly_impervious
29        0        0        0        0                1                    0      0                           0                           0                        0                            0                        0
535       0        0        0        0                0                    0      0                           0                           0                        0                            1                        0
695       0        0        0        0                0                    1      0                           0                           0                        0                            0                        0
557       0        0        0        0                0                    1      0                           0             

In [14]:
X_train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 800 entries, 29 to 102
Data columns (total 12 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   PercImperv     800 non-null    float64
 1   PercSlope      800 non-null    float64
 2   N-Imperv       800 non-null    float64
 3   N-Perv         800 non-null    float64
 4   S-Imperv       800 non-null    float64
 5   S-Perv         800 non-null    float64
 6   PctZero        800 non-null    float64
 7   RunoffCoeff    800 non-null    float64
 8   TotalInfil     800 non-null    float64
 9   ImpervRunoff   800 non-null    float64
 10  TotalRunoffMG  800 non-null    float64
 11  PeakRunoff     800 non-null    float64
dtypes: float64(12)
memory usage: 81.2 KB


In [15]:
X_train[:50]

Unnamed: 0,PercImperv,PercSlope,N-Imperv,N-Perv,S-Imperv,S-Perv,PctZero,RunoffCoeff,TotalInfil,ImpervRunoff,TotalRunoffMG,PeakRunoff
29,44.54,22.99,0.01,0.05,1.27,5.55,10.83,0.43,15.494382,11.988764,0.11236,0.067416
535,68.36,0.78,0.01,0.16,1.33,4.69,53.81,0.668,9.609756,20.329268,0.170732,0.097561
695,14.5,10.59,0.02,0.05,1.46,4.53,9.87,0.166,188.272727,31.363636,0.0,0.090909
557,10.6,9.88,0.02,0.03,1.27,5.63,7.1,0.108,138.75,15.875,0.0,0.0625
836,19.45,8.2,0.01,0.07,1.41,5.21,7.74,0.191,71.428571,16.535714,0.035714,0.035714
596,39.01,28.29,0.02,0.05,1.27,4.43,11.78,0.395,26.285714,16.678571,0.107143,0.089286
165,82.01,10.64,0.01,0.15,1.3,4.66,47.53,0.803,7.586207,34.448276,0.206897,0.12069
918,88.91,5.66,0.01,0.13,1.42,5.74,48.06,0.864,4.0,31.304348,0.217391,0.130435
495,63.3,20.84,0.01,0.14,1.33,5.48,49.55,0.619,11.87013,20.051948,0.155844,0.090909
824,19.55,8.98,0.01,0.04,1.27,5.07,11.77,0.196,31.09375,7.328125,0.046875,0.046875


In [16]:
import pandas as pd
import numpy as np

# Twoje wartości minimalne i maksymalne dla niektórych kolumn
min_values = {
    "PercImperv": 0,   
    "PercSlope": 0,    
    "N-Imperv": 0.01, 
    "N-Perv": 0.01,   
    "S-Imperv": 1.27,  
    "S-Perv": 1.27,    
    "PctZero": 0,      
    "RunoffCoeff": 0,  
}

max_values = {
    "PercImperv": 100,
    "PercSlope": 100, 
    "N-Imperv": 0.8,  
    "N-Perv": 0.8,    
    "S-Imperv": 7.62, 
    "S-Perv": 7.62,   
    "PctZero": 100,     
    "RunoffCoeff": 1, 
}

def min_max_normalize(df, min_values, max_values):
    return (df - pd.Series(min_values)) / (pd.Series(max_values) - pd.Series(min_values))

X_train_normalized = X_train.copy()

cols_to_normalize = list(min_values.keys())
X_train_normalized[cols_to_normalize] = min_max_normalize(X_train[cols_to_normalize], min_values, max_values)

cols_to_normalize_by_data = [col for col in X_train.columns if col not in min_values]

X_train_normalized[cols_to_normalize_by_data] = (X_train[cols_to_normalize_by_data] - X_train[cols_to_normalize_by_data].min()) / (X_train[cols_to_normalize_by_data].max() - X_train[cols_to_normalize_by_data].min())

print(X_train_normalized.head(50))



normalizer = Normalization()
normalizer.adapt(np.array(X_train))


     PercImperv  PercSlope  N-Imperv    N-Perv  S-Imperv    S-Perv  PctZero  RunoffCoeff  TotalInfil  ImpervRunoff  TotalRunoffMG  PeakRunoff
29       0.4454     0.2299  0.000000  0.050633  0.000000  0.674016   0.1083        0.430    0.005319      0.005572       0.449438    0.337079
535      0.6836     0.0078  0.000000  0.189873  0.009449  0.538583   0.5381        0.668    0.002917      0.009455       0.682927    0.487805
695      0.1450     0.1059  0.012658  0.050633  0.029921  0.513386   0.0987        0.166    0.075855      0.014592       0.000000    0.454545
557      0.1060     0.0988  0.012658  0.025316  0.000000  0.686614   0.0710        0.108    0.055637      0.007381       0.000000    0.312500
836      0.1945     0.0820  0.000000  0.075949  0.022047  0.620472   0.0774        0.191    0.028154      0.007689       0.142857    0.178571
596      0.3901     0.2829  0.012658  0.050633  0.000000  0.497638   0.1178        0.395    0.009725      0.007755       0.428571    0.446429
165   

# Build the model

In [17]:
model = Sequential([
    Input(shape=(X_train.shape[1],)),
    normalizer,
    Dense(32, activation='relu'),
    Dropout(0.1),
    Dense(32, activation='relu'),
    Dropout(0.1),

    Dense(units=y_train.shape[1], activation='softmax')
])
model.summary()

In [18]:
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)
history = model.fit(
    x=X_train,
    y=y_train,
    epochs=1000,
    validation_data=(X_test, y_test),
    validation_split=0.2,
    verbose=1,
    batch_size=32,
    callbacks=[EarlyStopping(monitor='val_loss', patience=20)]
)

Epoch 1/1000
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 22ms/step - accuracy: 0.0993 - loss: 2.4547 - val_accuracy: 0.2850 - val_loss: 2.2488
Epoch 2/1000
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.2370 - loss: 2.2376 - val_accuracy: 0.4200 - val_loss: 2.0177
Epoch 3/1000
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4169 - loss: 1.9386 - val_accuracy: 0.4500 - val_loss: 1.7371
Epoch 4/1000
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.4385 - loss: 1.7167 - val_accuracy: 0.5250 - val_loss: 1.4746
Epoch 5/1000
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4796 - loss: 1.4675 - val_accuracy: 0.5550 - val_loss: 1.2655
Epoch 6/1000
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.5666 - loss: 1.2498 - val_accuracy: 0.6700 - val_loss: 1.0988
Epoch 7/1000
[1m25/25[0m 

Najlepsze parametry: {'batch_size': 32, 'epochs': 1000, 'model__dropout_rate': 0.2, 'model__learning_rate': 0.0005, 'model__neurons': 64}


In [19]:
# Evaluate ANN model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test set accuracy: {accuracy:.3f}")

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9648 - loss: 0.0822 
Test set accuracy: 0.975


In [20]:
# model.predict(X_test)
y_pred = model.predict(X_test)

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step


In [21]:
metrics = pd.DataFrame(history.history)
metrics['epoch'] = history.epoch
metrics

Unnamed: 0,accuracy,loss,val_accuracy,val_loss,epoch
0,0.13750,2.400486,0.285,2.248767,0
1,0.30500,2.154718,0.420,2.017721,1
2,0.41375,1.886969,0.450,1.737147,2
3,0.43750,1.647107,0.525,1.474554,3
4,0.52125,1.406454,0.555,1.265505,4
...,...,...,...,...,...
145,0.97625,0.065194,0.960,0.073515,145
146,0.98250,0.056870,0.975,0.061970,146
147,0.97375,0.067268,0.970,0.065577,147
148,0.97125,0.060697,0.960,0.066933,148


# Model evaluate

In [22]:
# import plotly.graph_objects as go
# from plotly.subplots import make_subplots

# fig = make_subplots(rows=1, cols=2)
# fig.add_trace(go.Scatter(x=metrics['epoch'], y=metrics['accuracy'], name='accuracy'), row=1, col=1)
# fig.add_trace(go.Scatter(x=metrics['epoch'], y=metrics['loss'], name='loss'), row=1, col=2)
# fig.add_trace(go.Scatter(x=metrics['epoch'], y=metrics['val_accuracy'], name='val_accuracy'), row=1, col=1)
# fig.add_trace(go.Scatter(x=metrics['epoch'], y=metrics['val_loss'], name='val_loss'), row=1, col=2)

# fig.update_xaxes(title_text='epochs')
# fig.update_yaxes(title_text='accuracy', row=1, col=1)
# fig.update_yaxes(title_text='loss', row=1, col=2)
# fig.update_layout(width=1000, title='Accuracy and Loss')
# fig.show()
import plotly.graph_objects as go
from plotly.subplots import make_subplots

fig = make_subplots(rows=2, cols=1)

fig.add_trace(go.Scatter(x=metrics['epoch'], y=metrics['accuracy'], name='Dokładność'), row=1, col=1)
fig.add_trace(go.Scatter(x=metrics['epoch'], y=metrics['val_accuracy'], name='Dokładność walidacyjna'), row=1, col=1)

fig.add_trace(go.Scatter(x=metrics['epoch'], y=metrics['loss'], name='Strata'), row=2, col=1)
fig.add_trace(go.Scatter(x=metrics['epoch'], y=metrics['val_loss'], name='Strata walidacyjna'), row=2, col=1)

# Powiększone czcionki dla tytułów i etykiet osi X
fig.update_xaxes(title_text='Epoki', row=1, col=1, showgrid=True, gridcolor='lightgray', title_font=dict(size=16), tickfont=dict(size=14))
fig.update_xaxes(title_text='Epoki', row=2, col=1, showgrid=True, gridcolor='lightgray', title_font=dict(size=16), tickfont=dict(size=14))

# Powiększone czcionki dla tytułów i etykiet osi Y
fig.update_yaxes(
    title_text='Dokładność', 
    row=1, col=1, 
    showgrid=True, 
    gridcolor='lightgray',
    dtick=0.25,
    range=[0, 1.1],
    title_font=dict(size=16),  # Czcionka tytułu osi Y
    tickfont=dict(size=14)  # Czcionka etykiet osi Y
)

fig.update_yaxes(
    title_text='Strata', 
    row=2, col=1, 
    showgrid=True, 
    gridcolor='lightgray',
    dtick=0.5,
    range=[0, max(metrics['loss'].max() + 0.1, metrics['val_loss'].max() + 0.1)],
    title_font=dict(size=16),  # Czcionka tytułu osi Y
    tickfont=dict(size=14)  # Czcionka etykiet osi Y
)

# Powiększenie czcionki legendy i tytułu wykresu
fig.update_layout(
    width=800,
    height=600,
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.1,
        xanchor="center",
        x=0.5,
        font=dict(size=14)  # Czcionka legendy
    ),
    plot_bgcolor='white',
)

fig.show()


In [24]:
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
print(test_acc)

0.9750000238418579


Predykcja na podstawie modelu.
* model.evaluate(y_true, y_pred) - pozwala obliczyć metryki modelu
* model.predict_classes() - pozwala zwrócić odpowiednio przewidziane klasy
* model.predict_proba(), model.predict() - pozwala zwrócić prawdopodobieństwo danej klasy

In [25]:
predictions = model.predict(X_test)
predictions

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 


array([[1.7157743e-05, 3.8433620e-13, 2.6877274e-07, ..., 7.6036432e-16, 6.4893624e-10, 1.6350555e-08],
       [4.2483268e-15, 3.8059111e-11, 5.7413262e-18, ..., 1.2226273e-07, 5.4005145e-08, 4.5935198e-15],
       [9.5194125e-01, 1.0951483e-09, 4.8031203e-02, ..., 1.9530277e-09, 8.6294782e-10, 1.7570189e-09],
       ...,
       [2.6641776e-06, 2.9321649e-13, 1.7284032e-08, ..., 2.1378267e-11, 1.8284493e-09, 1.0716712e-10],
       [2.5235201e-08, 1.2008283e-12, 1.1641573e-09, ..., 1.6009830e-10, 1.7333823e-09, 4.1966802e-11],
       [2.5034760e-07, 7.5925009e-11, 8.0571398e-09, ..., 3.3327876e-09, 1.6465411e-07, 5.5160712e-09]], dtype=float32)

In [26]:
predictions_df = pd.DataFrame(predictions)
predictions_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,1.715774e-05,3.843362e-13,2.687727e-07,8.337373e-09,1.643638e-12,4.330885e-10,1.637893e-03,1.547487e-05,9.983292e-01,7.603643e-16,6.489362e-10,1.635055e-08
1,4.248327e-15,3.805911e-11,5.741326e-18,2.137520e-17,9.999996e-01,1.906970e-07,1.283642e-17,7.212943e-12,1.363020e-17,1.222627e-07,5.400515e-08,4.593520e-15
2,9.519413e-01,1.095148e-09,4.803120e-02,7.849786e-06,5.682636e-11,1.920182e-05,4.512546e-07,2.582585e-08,1.031928e-08,1.953028e-09,8.629478e-10,1.757019e-09
3,5.043832e-06,4.851818e-12,1.129062e-07,1.893648e-13,3.582298e-07,9.999945e-01,5.974197e-14,2.856955e-13,4.224947e-14,1.031481e-09,1.250542e-09,3.457730e-11
4,1.259583e-10,1.912929e-09,6.701976e-10,8.671197e-10,2.205012e-06,1.773414e-09,4.011898e-14,1.638562e-04,2.255623e-10,1.263992e-02,9.871897e-01,4.321742e-06
...,...,...,...,...,...,...,...,...,...,...,...,...
195,3.230416e-06,3.048547e-03,2.908140e-05,9.968690e-01,9.166501e-10,6.050000e-11,3.142809e-05,1.752524e-05,2.317839e-11,1.290772e-06,5.257976e-11,3.263604e-12
196,1.060602e-06,2.615870e-05,8.293337e-06,9.999465e-01,3.320251e-12,4.860028e-13,1.651151e-05,1.507848e-06,2.651875e-12,5.107479e-09,4.846716e-13,4.049627e-14
197,2.664178e-06,2.932165e-13,1.728403e-08,1.560974e-14,1.969757e-07,9.999971e-01,8.043552e-14,6.871216e-14,1.321418e-13,2.137827e-11,1.828449e-09,1.071671e-10
198,2.523520e-08,1.200828e-12,1.164157e-09,1.192851e-14,4.079234e-06,9.999959e-01,8.275032e-15,1.408819e-13,5.238808e-14,1.600983e-10,1.733382e-09,4.196680e-11


In [27]:
predictions_cls = predictions.argmax(axis=-1)
predictions_cls

array([ 8,  4,  0,  5, 10, 10,  7,  6,  9,  6,  8,  0,  0,  7,  9,  7,  5,  5,  5,  1,  0,  9,  5,  5,  5,  3, 10,  5,  7,  0, 10,  0,  9,  5,  6,  1, 11,  9, 10,  0,  0,  5,  5, 10,  7,  7, 10,  0,  5,  0,  3, 10,  5,  7, 10,  3,  5,  5,  4,  5,  4,  0,  0,  9,  1, 10,  0,  1,  9,  1,  4, 10, 11,  5,  3,  7, 10,  8,  8,  4,  0, 10,  8,  4,  5,  8,  5,  5, 11,  9,  8,  0,  5,  9,  9,  5,  1, 11,  7,  5,  4,  8,  8,  4,  6, 10,  8,  5,  0,  8,  9,  5,  8,  0,  3,  0,  0,  6, 10,  5,  0,  5,  1,
        9,  0,  8,  1,  7,  9,  0,  8,  9,  0, 10,  8,  9,  6,  0,  0,  0,  0,  6,  0,  5, 11,  8,  6,  5,  6,  1,  0,  9,  5,  7,  8, 10,  8,  8,  0,  9,  5,  3, 10,  1,  0,  5, 10,  6,  8,  4,  7,  5,  0,  3,  1,  5,  7, 10,  8,  5,  7, 10,  5,  5,  4,  5,  7,  5,  5,  6,  0,  7,  5, 10, 10,  3,  3,  5,  5,  5], dtype=int64)

In [28]:
# realne dane
inp = R"C:\Users\Dell\Documents\Git\stormwater-analysis\stormwater_analysis\catchment_classification_model\wroclaw_generated -test set.inp"
with pyswmm.Simulation(inp) as sim:
    for step in sim:
        pass

m = swmmio.Model(inp)
raw_subctch = m.subcatchments.dataframe
subctch = raw_subctch.copy()

subctch["TotalInfil"] = pd.to_numeric(subctch["TotalInfil"], errors='coerce')
subctch["ImpervRunoff"] = pd.to_numeric(subctch["ImpervRunoff"], errors='coerce')
subctch["TotalRunoffMG"] = pd.to_numeric(subctch["TotalRunoffMG"], errors='coerce')
subctch["PeakRunoff"] = pd.to_numeric(subctch["PeakRunoff"], errors='coerce')

df = subctch[["PercImperv", "PercSlope", "N-Imperv", "N-Perv", "S-Imperv", "S-Perv", "PctZero", "RunoffCoeff"]].copy()
df["TotalInfil"] = subctch["TotalInfil"] / subctch["Area"] if subctch["TotalInfil"].sum() != 0 else 0
df["ImpervRunoff"] = subctch["ImpervRunoff"] / subctch["Area"] if subctch["ImpervRunoff"].sum() != 0 else 0
df["TotalRunoffMG"] = subctch["TotalRunoffMG"] / subctch["Area"] if subctch["TotalRunoffIn"].sum() != 0 else 0
df["PeakRunoff"] = subctch["PeakRunoff"] / subctch["Area"] if subctch["PeakRunoff"].sum() != 0 else 0

t = m.inp.tags
df = merge_tag(df, m)
df = df.reset_index(drop=True)
y_real = df['categories']
X_real = df.drop("categories", axis=1)
# y_real = pd.get_dummies(df["categories"])

pred = model.predict(X_real)
pred_cls = pred.argmax(axis=-1)

index_to_class = classes["classes"].to_dict()
y_pred_classes = [index_to_class[idx] for idx in pred_cls]
y_pred_classes
comp = [(pred, real, pred == real) for pred, real in zip(y_pred_classes, y_real)]
print(comp)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step  
[('urban_highly_impervious', 'urban_moderately_impervious', False), ('urban_weakly_impervious', 'urban_weakly_impervious', True), ('urban_highly_impervious', 'urban_weakly_impervious', False), ('urban_weakly_impervious', 'urban_weakly_impervious', True), ('urban_highly_impervious', 'urban_weakly_impervious', False), ('urban_weakly_impervious', 'urban_weakly_impervious', True), ('urban_highly_impervious', 'urban_moderately_impervious', False), ('urban_weakly_impervious', 'urban_weakly_impervious', True), ('urban_highly_impervious', 'urban_moderately_impervious', False), ('urban_highly_impervious', 'urban_moderately_impervious', False), ('urban_highly_impervious', 'urban_moderately_impervious', False), ('urban_highly_impervious', 'urban_moderately_impervious', False), ('urban_weakly_impervious', 'urban_weakly_impervious', True), ('urban_highly_impervious', 'urban_highly_impervious', True), ('urban_highly_impervious'

In [41]:
l = [
    ('urban_highly_impervious', 'urban_moderately_impervious', False),
    ('urban_weakly_impervious', 'urban_weakly_impervious', True),
    ('urban_highly_impervious', 'urban_weakly_impervious', False),
    ('urban_weakly_impervious', 'urban_weakly_impervious', True),
    ('urban_weakly_impervious', 'urban_weakly_impervious', True),
    ('urban_weakly_impervious', 'urban_weakly_impervious', True),
    ('urban_moderately_impervious', 'urban_moderately_impervious', True),
    ('urban_weakly_impervious', 'urban_weakly_impervious', True),
    ('urban_moderately_impervious', 'urban_moderately_impervious', True),
    ('urban_moderately_impervious', 'urban_moderately_impervious', True),
    ('urban_moderately_impervious', 'urban_moderately_impervious', True),
    ('urban_moderately_impervious', 'urban_moderately_impervious', True),
    ('urban_weakly_impervious', 'urban_weakly_impervious', True),
    ('urban_highly_impervious', 'urban_highly_impervious', True),
    ('urban_highly_impervious', 'urban_highly_impervious', True),
    ('urban_highly_impervious', 'urban_highly_impervious', True),
    ('urban_highly_impervious', 'urban_highly_impervious', True),
    ('urban_highly_impervious', 'urban_moderately_impervious', False),
    ('urban_weakly_impervious', 'urban_weakly_impervious', True),
    ('urban_weakly_impervious', 'urban_weakly_impervious', True),
    ('urban_moderately_impervious', 'urban_moderately_impervious', True),
    ('urban_moderately_impervious', 'urban_moderately_impervious', True),
    ('urban_moderately_impervious', 'urban_moderately_impervious', True),
    ('suburban_highly_impervious', 'suburban_weakly_impervious', False),
    ('urban_highly_impervious', 'urban_highly_impervious', True),
    ('urban_highly_impervious', 'urban_moderately_impervious', False),
    ('urban_weakly_impervious', 'urban_weakly_impervious', True),
    ('urban_weakly_impervious', 'urban_weakly_impervious', True),
    ('urban_weakly_impervious', 'urban_weakly_impervious', True),
    ('suburban_weakly_impervious', 'suburban_weakly_impervious', True),
    ('suburban_weakly_impervious', 'suburban_weakly_impervious', True),
    ('urban_highly_impervious', 'urban_moderately_impervious', False),
    ('urban_highly_impervious', 'urban_highly_impervious', True),
    ('urban_moderately_impervious', 'urban_moderately_impervious', True),
    ('urban_moderately_impervious', 'urban_moderately_impervious', True),
    ('urban_moderately_impervious', 'urban_moderately_impervious', True),
    ('urban_weakly_impervious', 'urban_weakly_impervious', True)]
counter = 0
for _, _, bool in l:
    if bool:
        counter += 1
print( (counter / len(l)) * 100)

83.78378378378379


In [42]:
df = pd.DataFrame(data=l)
df.to_excel("resultspred.xlsx")

# Zapisanie modelu

In [None]:
# model.save(R'C:\Users\Dell\Documents\Git\stormwater-analysis\storm_analysis\sa\core\data\catchemnt_classifier\model.keras')


# Załadowanie modelu

In [30]:
# from tensorflow import keras
# model = keras.models.load_model(R'C:\Users\Dell\Documents\Git\stormwater-analysis\storm_analysis\sa\core\data\catchemnt_classifier\model.keras')

In [31]:
! pip install pydot





[notice] A new release of pip is available: 24.0 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [32]:
!pip install graphviz 




[notice] A new release of pip is available: 24.0 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [33]:
from tensorflow.keras.utils import plot_model
plot_model(model)

You must install graphviz (see instructions at https://graphviz.gitlab.io/download/) for `plot_model` to work.


In [34]:
pred = model.predict(X_test)
pred

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 


array([[1.7157743e-05, 3.8433620e-13, 2.6877274e-07, ..., 7.6036432e-16, 6.4893624e-10, 1.6350555e-08],
       [4.2483268e-15, 3.8059111e-11, 5.7413262e-18, ..., 1.2226273e-07, 5.4005145e-08, 4.5935198e-15],
       [9.5194125e-01, 1.0951483e-09, 4.8031203e-02, ..., 1.9530277e-09, 8.6294782e-10, 1.7570189e-09],
       ...,
       [2.6641776e-06, 2.9321649e-13, 1.7284032e-08, ..., 2.1378267e-11, 1.8284493e-09, 1.0716712e-10],
       [2.5235201e-08, 1.2008283e-12, 1.1641573e-09, ..., 1.6009830e-10, 1.7333823e-09, 4.1966802e-11],
       [2.5034760e-07, 7.5925009e-11, 8.0571398e-09, ..., 3.3327876e-09, 1.6465411e-07, 5.5160712e-09]], dtype=float32)