### Imports

In [124]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_absolute_error, mean_squared_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

### Load your data

In [125]:
file_url = 'data/data.csv'
data = pd.read_csv(file_url)
print(data.shape)
data.head()

(500, 10)


Unnamed: 0,Asset ID,Asset Type,Floor,Room,Installation Date,Manufacturer,Operational Time (hrs),Work Orders,Repairs,Last Serviced Date
0,1,Elevator,7,103,1/6/2020,Manufacturer_4,39313,0,1,6/19/2023
1,2,Plumbing System,6,109,6/11/2022,Manufacturer_2,20012,0,1,1/6/2023
2,3,Fire Alarm,3,104,7/24/2019,Manufacturer_4,21546,10,5,9/24/2022
3,4,Elevator,1,105,1/4/2021,Manufacturer_4,47038,1,1,5/23/2023
4,5,Plumbing System,2,104,11/28/2022,Manufacturer_1,20022,10,3,1/29/2023


In [126]:
reference_date = pd.Timestamp.now()

data['Installation Date'] = pd.to_datetime(data['Installation Date'], format='%m/%d/%Y')
data['Days Since Installation'] = abs((data['Installation Date'] - reference_date).dt.days)

data['Last Serviced Date'] = pd.to_datetime(data['Last Serviced Date'], format='%m/%d/%Y')
data['Days Since Last Serviced'] = abs((data['Last Serviced Date'] - reference_date).dt.days)

data.head(20)


Unnamed: 0,Asset ID,Asset Type,Floor,Room,Installation Date,Manufacturer,Operational Time (hrs),Work Orders,Repairs,Last Serviced Date,Days Since Installation,Days Since Last Serviced
0,1,Elevator,7,103,2020-01-06,Manufacturer_4,39313,0,1,2023-06-19,1350,90
1,2,Plumbing System,6,109,2022-06-11,Manufacturer_2,20012,0,1,2023-01-06,463,254
2,3,Fire Alarm,3,104,2019-07-24,Manufacturer_4,21546,10,5,2022-09-24,1516,358
3,4,Elevator,1,105,2021-01-04,Manufacturer_4,47038,1,1,2023-05-23,986,117
4,5,Plumbing System,2,104,2022-11-28,Manufacturer_1,20022,10,3,2023-01-29,293,231
5,6,Fire Alarm,2,103,2021-07-29,Manufacturer_3,1602,9,4,2022-10-03,780,349
6,7,Fire Alarm,6,108,2020-08-23,Manufacturer_2,15650,2,3,2023-01-13,1120,247
7,8,HVAC,2,106,2021-06-03,Manufacturer_2,33932,1,4,2023-08-09,836,39
8,9,Fire Alarm,6,108,2023-07-30,Manufacturer_5,24332,5,4,2023-02-13,49,216
9,10,Elevator,7,101,2019-07-24,Manufacturer_4,18722,10,2,2023-05-29,1516,111


### Split the data into training and testing sets

In [127]:
features = ['Asset Type', 'Manufacturer', 'Operational Time (hrs)', 'Days Since Installation', 'Days Since Last Serviced']
target = ['Repairs']

X_train, X_test, Y_train, Y_test = train_test_split(data[features], data[target], test_size=0.2, random_state=42)

print(
    "Using %d samples for training and %d for validation"
    % (len(X_train), len(X_test))
)

Using 400 samples for training and 100 for validation


In [128]:
X_train.head()

Unnamed: 0,Asset Type,Manufacturer,Operational Time (hrs),Days Since Installation,Days Since Last Serviced
249,HVAC,Manufacturer_1,6764,1477,358
433,Elevator,Manufacturer_5,29385,83,305
19,Electrical Panel,Manufacturer_2,29859,1636,103
322,Fire Alarm,Manufacturer_2,32376,815,353
332,Plumbing System,Manufacturer_3,1478,787,298


### Build a preprocessor for categorical and numerical data

In [129]:
num_features = ['Operational Time (hrs)', 'Days Since Installation', 'Days Since Last Serviced']
cat_features = ['Asset Type', 'Manufacturer']

preprocessor = ColumnTransformer(
    transformers=[
        ('num', 'passthrough', num_features),
        ('cat', OneHotEncoder(), cat_features)])

X_train = preprocessor.fit_transform(X_train)
X_test = preprocessor.transform(X_test)

In [130]:
print(X_train)

[[6.7640e+03 1.4770e+03 3.5800e+02 ... 0.0000e+00 0.0000e+00 0.0000e+00]
 [2.9385e+04 8.3000e+01 3.0500e+02 ... 0.0000e+00 0.0000e+00 1.0000e+00]
 [2.9859e+04 1.6360e+03 1.0300e+02 ... 0.0000e+00 0.0000e+00 0.0000e+00]
 ...
 [5.9900e+02 1.1450e+03 2.2300e+02 ... 1.0000e+00 0.0000e+00 0.0000e+00]
 [2.2612e+04 1.5460e+03 8.2000e+01 ... 0.0000e+00 0.0000e+00 0.0000e+00]
 [2.9016e+04 1.6500e+02 2.1200e+02 ... 0.0000e+00 0.0000e+00 1.0000e+00]]


### Train the model

In [131]:
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.5),
    Dense(32, activation='relu'),
    Dense(1, activation='linear')  # This is a regression task, so we use a linear activation function in the output layer
])

model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

history = model.fit(X_train, Y_train, epochs=100, batch_size=32, validation_split=0.1, verbose=1)

Epoch 1/100


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 7

In [132]:
# model = LinearRegression()
# model.fit(X_train, Y_train)

### Predict on the validation set and evaluate

In [133]:
# Y_pred = model.predict(X_test)
# mae = mean_absolute_error(Y_test, Y_pred)
# rmse = np.sqrt(mean_squared_error(Y_test, Y_pred))

# print("Mean Absolute Error:", mae)
# print("Root Mean Squared Error:", rmse)

test_loss, test_mae = model.evaluate(X_test, Y_test, verbose=0)
print(f"Test MAE: {test_mae}")

Test MAE: 51.43522644042969


In [134]:
print(Y_pred[0:5])
Y_test.head()

[[2.40502467]
 [2.64762331]
 [2.61975442]
 [2.34194628]
 [2.96638862]]


Unnamed: 0,Repairs
361,4
73,1
374,5
155,1
104,1
