In [1]:
pip install joblib

Note: you may need to restart the kernel to use updated packages.


In [37]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

In [38]:
dataset = pd.read_csv('data.csv')

In [39]:
data = data = {'Area': [100, 200, 300, 400, 500],
        'Rooms': [2, 4, 6, 8, 10],
        'MaterialList': ['Pipe 50,  Plugs 100, Joints 80,  Valves 10,  Connectors 15, Faucets 10',
                         'Pipe 100, Plugs 200, Joints 160, Valves 20,  Connectors 30, Faucets 20',
                         'Pipe 150, Plugs 250, Joints 240, Valves 30,  Connectors 45, Faucets 30',
                         'Pipe 200, Plugs 300, Joints 320, Valves 40,  Connectors 60, Faucets 40',
                         'Pipe 250, Plugs 350, Joints 400, Valves 50,  Connectors 75, Faucets 50']}

In [40]:
df = pd.DataFrame(data)

df['Pipe'] = df['MaterialList'].str.extract('Pipe (\d+)', expand=False).astype(float)
df['Plugs'] = df['MaterialList'].str.extract('Plugs (\d+)', expand=False).astype(float)
df['Joints'] = df['MaterialList'].str.extract('Joints (\d+)', expand=False).astype(float)
df['Valves'] = df['MaterialList'].str.extract('Valves (\d+)', expand=False).astype(float)
df['Connectors'] = df['MaterialList'].str.extract('Connectors (\d+)', expand=False).astype(float)
df['Faucets'] = df['MaterialList'].str.extract('Faucets (\d+)', expand=False).astype(float)

df = df.drop('MaterialList', axis=1)

In [41]:
print(df)

   Area  Rooms   Pipe  Plugs  Joints  Valves  Connectors  Faucets
0   100      2   50.0  100.0    80.0    10.0        15.0     10.0
1   200      4  100.0  200.0   160.0    20.0        30.0     20.0
2   300      6  150.0  250.0   240.0    30.0        45.0     30.0
3   400      8  200.0  300.0   320.0    40.0        60.0     40.0
4   500     10  250.0  350.0   400.0    50.0        75.0     50.0


In [42]:
model = RandomForestRegressor(n_estimators=100, random_state=42)

In [43]:
X = df[['Area', 'Rooms']]
y = df[['Pipe', 'Plugs', 'Joints', 'Valves', 'Connectors', 'Faucets']]

In [44]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [45]:
print(y)

    Pipe  Plugs  Joints  Valves  Connectors  Faucets
0   50.0  100.0    80.0    10.0        15.0     10.0
1  100.0  200.0   160.0    20.0        30.0     20.0
2  150.0  250.0   240.0    30.0        45.0     30.0
3  200.0  300.0   320.0    40.0        60.0     40.0
4  250.0  350.0   400.0    50.0        75.0     50.0


In [46]:
model.fit(X_train, y_train)

In [47]:
predictions = model.predict(X_test)

In [48]:
mse = mean_squared_error(y_test, predictions)

In [49]:
print(f'Mean Squared Error: {mse}')

Mean Squared Error: 557.7304166666668


In [50]:
import joblib

In [51]:
joblib.dump(model, 'plumbing_model.pkl')

['plumbing_model.pkl']

In [52]:
user_data = pd.DataFrame({'Area': [500], 'Rooms': [10]})

In [53]:
print(user_data)

   Area  Rooms
0   500     10


In [54]:
user_predictions = model.predict(user_data)

In [55]:
print(f"Predicted Material List: {user_predictions[0]}")

Predicted Material List: [231.5  331.5  370.4   46.3   69.45  46.3 ]


In [56]:
print("Predicted Material Quantities:")
for material, quantity in zip(['Pipe', 'Plugs', 'Joints', 'Valves', 'Connectors', 'Faucets'], user_predictions[0]):
    print(f"{material}: {quantity}")

Predicted Material Quantities:
Pipe: 231.5
Plugs: 331.5
Joints: 370.4
Valves: 46.3
Connectors: 69.45
Faucets: 46.3
