<a href="https://colab.research.google.com/github/OmShetgaonkar/TraVit/blob/master/VITHack1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install dask[xgboost] xgboost dask-ml


Collecting dask-ml
  Downloading dask_ml-2024.4.4-py3-none-any.whl.metadata (5.9 kB)
Collecting dask-glm>=0.2.0 (from dask-ml)
  Downloading dask_glm-0.3.2-py2.py3-none-any.whl.metadata (1.5 kB)
Collecting sparse>=0.7.0 (from dask-glm>=0.2.0->dask-ml)
  Downloading sparse-0.15.4-py2.py3-none-any.whl.metadata (4.5 kB)
Collecting dask-expr<1.2,>=1.1 (from dask[xgboost])
  Downloading dask_expr-1.1.14-py3-none-any.whl.metadata (2.5 kB)
INFO: pip is looking at multiple versions of dask-expr to determine which version is compatible with other requirements. This could take a while.
  Downloading dask_expr-1.1.13-py3-none-any.whl.metadata (2.5 kB)
  Downloading dask_expr-1.1.12-py3-none-any.whl.metadata (2.5 kB)
  Downloading dask_expr-1.1.11-py3-none-any.whl.metadata (2.5 kB)
  Downloading dask_expr-1.1.10-py3-none-any.whl.metadata (2.5 kB)
Downloading dask_ml-2024.4.4-py3-none-any.whl (149 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m149.8/149.8 kB[0m [31m6.7 MB/s[0m

In [13]:
import dask.dataframe as dd
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import train_test_split
import joblib

# Step 1: Load the Temperature Data
temperature_data = dd.read_csv('/content/ECCO-19400_19400_-sensors-data-_3_.csv', assume_missing=True)
print("Temperature Data:")
print(temperature_data.head())

# Step 2: Clean the Temperature Data
temperature_data['DateTime'] = dd.to_datetime(temperature_data['DateTime'], errors='coerce')
temperature_data = temperature_data.map_partitions(lambda df: df.ffill())
temperature_data = temperature_data.compute()  # Compute to get the cleaned data
print("Cleaned Temperature Data:")
print(temperature_data.head())

# Step 3: Define Features and Target for Temperature Data
temperature_features = ['Temperature [°C]', 'RH [%]', 'WBT_C']
# Check available columns
print("Available columns in temperature data:", temperature_data.columns)

# Update the target variable based on your data (change 'RT' to an appropriate column)
temperature_target = 'Temperature [°C]'  # Example target, adjust as needed

# Ensure target has no missing values
X_temp = temperature_data[temperature_features]
y_temp = temperature_data[temperature_target].dropna()
X_temp = X_temp.loc[y_temp.index]  # Align X with y

# Step 4: Train-Test Split for Temperature Data
X_temp_train, X_temp_test, y_temp_train, y_temp_test = train_test_split(X_temp, y_temp, test_size=0.2, random_state=42)

# Step 5: Model Training for Temperature Data
temp_model = RandomForestRegressor(n_estimators=100, random_state=42)
temp_model.fit(X_temp_train, y_temp_train)

# Step 6: Make Predictions and Evaluate the Model for Temperature Data
y_temp_pred = temp_model.predict(X_temp_test)
temp_mae = mean_absolute_error(y_temp_test, y_temp_pred)
temp_rmse = mean_squared_error(y_temp_test, y_temp_pred, squared=False)

print(f'Temperature Data - Mean Absolute Error (MAE): {temp_mae}')
print(f'Temperature Data - Root Mean Squared Error (RMSE): {temp_rmse}')

# Step 7: Save the Temperature Model
joblib.dump(temp_model, 'temperature_model.pkl')

# ----------------------------------

# Step 8: Load the Efficiency Data
efficiency_data = dd.read_csv('/content/TableData (6).csv', assume_missing=True)
print("Efficiency Data:")
print(efficiency_data.head())

# Step 9: Clean the Efficiency Data
efficiency_data['Time'] = dd.to_datetime(efficiency_data['Time'], errors='coerce')
efficiency_data = efficiency_data.map_partitions(lambda df: df.ffill())
efficiency_data = efficiency_data.compute()  # Compute to get the cleaned data
print("Cleaned Efficiency Data:")
print(efficiency_data.head())

# Step 10: Define Features and Target for Efficiency Data
efficiency_features = ['kW_RT', 'CH Load']  # Choose relevant features
# Check available columns
print("Available columns in efficiency data:", efficiency_data.columns)

# Update the target variable based on your data
efficiency_target = 'kW_Tot'  # Example target, adjust as needed

# Ensure target has no missing values
X_eff = efficiency_data[efficiency_features]
y_eff = efficiency_data[efficiency_target].dropna()
X_eff = X_eff.loc[y_eff.index]  # Align X with y

# Step 11: Train-Test Split for Efficiency Data
X_eff_train, X_eff_test, y_eff_train, y_eff_test = train_test_split(X_eff, y_eff, test_size=0.2, random_state=42)

# Step 12: Model Training for Efficiency Data
eff_model = RandomForestRegressor(n_estimators=100, random_state=42)
eff_model.fit(X_eff_train, y_eff_train)

# Step 13: Make Predictions and Evaluate the Model for Efficiency Data
y_eff_pred = eff_model.predict(X_eff_test)
eff_mae = mean_absolute_error(y_eff_test, y_eff_pred)
eff_rmse = mean_squared_error(y_eff_test, y_eff_pred, squared=False)

print(f'Efficiency Data - Mean Absolute Error (MAE): {eff_mae}')
print(f'Efficiency Data - Root Mean Squared Error (RMSE): {eff_rmse}')

# Step 14: Save the Efficiency Model
joblib.dump(eff_model, 'efficiency_model.pkl')


Temperature Data:
                  DateTime  RH [%]  Temperature [°C]  WBT_C
0  2024-03-01 00:00:42.000    68.4              30.8   26.1
1  2024-03-01 00:10:42.000    68.3              30.8   26.1
2  2024-03-01 00:20:42.000    68.3              30.8   26.0
3  2024-03-01 00:30:42.000    68.4              30.8   26.1
4  2024-03-01 00:40:42.000    68.6              30.7   26.0
Cleaned Temperature Data:
             DateTime  RH [%]  Temperature [°C]  WBT_C
0 2024-03-01 00:00:42    68.4              30.8   26.1
1 2024-03-01 00:10:42    68.3              30.8   26.1
2 2024-03-01 00:20:42    68.3              30.8   26.0
3 2024-03-01 00:30:42    68.4              30.8   26.1
4 2024-03-01 00:40:42    68.6              30.7   26.0
Available columns in temperature data: Index(['DateTime', 'RH [%]', 'Temperature [°C]', 'WBT_C'], dtype='object')
Temperature Data - Mean Absolute Error (MAE): 0.0002807547170267368
Temperature Data - Root Mean Squared Error (RMSE): 0.010912101642861036
Efficiency D

['efficiency_model.pkl']

In [14]:
from google.colab import files

# Download both models
files.download('efficiency_model.pkl')
files.download('temperature_model.pkl')  # Assuming you have this model saved as well



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [24]:
import pickle

# After training your model
with open('efficiency_model.pkl', 'wb') as file:
    pickle.dump(model, file)


In [25]:
import pickle

# Load the model
with open('efficiency_model.pkl', 'rb') as file:
    model = pickle.load(file)


In [26]:
print(type(model))


<class 'numpy.ndarray'>


In [27]:
import pickle
import pandas as pd

# Load the model
with open('efficiency_model.pkl', 'rb') as file:
    model = pickle.load(file)

# Check if the model loaded correctly
print(type(model))  # Should show your model class

# Prepare input data
input_data = pd.DataFrame({
    'RT': [201.2],
    'kW_Tot': [208.7],
    'CH Load': [39.6],
    # Add other required features...
})

# Make predictions
predictions = model.predict(input_data)

# Output predictions
print("Predicted Efficiency:", predictions)


<class 'numpy.ndarray'>


AttributeError: 'numpy.ndarray' object has no attribute 'predict'

In [29]:
import pandas as pd

# Assuming X_train is a DataFrame
print(X_train.isnull().sum())  # This will show the count of missing values per column



Temperature [°C]    3552
RH [%]              3552
WBT_C               3552
RT                     0
CH Load                0
dtype: int64


In [30]:
# Fill with the mean of each column
X_train = X_train.fillna(X_train.mean())


In [31]:
from sklearn.impute import SimpleImputer

imputer = SimpleImputer(strategy='mean')  # Or 'median', 'most_frequent', etc.
X_train = imputer.fit_transform(X_train)




In [32]:
from sklearn.linear_model import LinearRegression

model = LinearRegression()
model.fit(X_train, y_train)

# Save the trained model
with open('efficiency_model.pkl', 'wb') as file:
    pickle.dump(model, file)


In [33]:
import pandas as pd

# Assuming X_temp is a DataFrame containing your temperature features
print(X_temp.isnull().sum())  # Count of missing values per column


RH [%]    0
WBT_C     0
dtype: int64


In [34]:
# Fill with the mean of each column
X_temp = X_temp.fillna(X_temp.mean())


In [35]:
from sklearn.linear_model import LinearRegression  # or whichever model you are using

# Train the model
temp_model = LinearRegression()
temp_model.fit(X_temp, y_temp)

# Save the trained model
with open('temperature_model.pkl', 'wb') as file:
    pickle.dump(temp_model, file)


In [36]:
print(X_temp.isnull().sum())  # Check again for NaN values


RH [%]    0
WBT_C     0
dtype: int64


In [37]:
from sklearn.linear_model import LinearRegression
import pickle

# Example of training the temperature model
temp_model = LinearRegression()
temp_model.fit(X_temp, y_temp)

# Save the trained model
with open('temperature_model.pkl', 'wb') as file:
    pickle.dump(temp_model, file)


In [38]:
# Load the trained temperature model
with open('temperature_model.pkl', 'rb') as file:
    temp_model = pickle.load(file)

# Prepare new input data for predictions
input_data = pd.DataFrame({
    'RH [%]': [68.4],
    'WBT_C': [26.1],
    # Add any other features you need...
})

# Make predictions
predictions = temp_model.predict(input_data)

# Output predictions
print("Predicted Temperature:", predictions)


Predicted Temperature: [30.86466838]


In [39]:
import pickle

# Load the trained efficiency model
with open('efficiency_model.pkl', 'rb') as file:
    efficiency_model = pickle.load(file)


In [40]:
import pandas as pd

# Example of preparing input data for predictions
input_data = pd.DataFrame({
    'RT': [200.0],          # Example values; replace with actual data
    'kW_Tot': [210.0],
    'kW_RT': [1.05],
    'CH Load': [40.0],
    'CH1': [0.0],
    'CH2': [1.0],
    'CH3': [0.0],
    'CH4': [0.0],
    # Include other features as necessary...
})


In [42]:
# Prepare input data with the correct features
input_data = pd.DataFrame({
    'RT': [200.0],          # Example value for RT
    'kW_Tot': [210.0],      # Example value for kW_Tot
    # Include only the features used in training...
})


In [43]:
print(input_data.shape)  # Should match (n_samples, n_features)


(1, 2)


In [44]:
# Make predictions
predictions = efficiency_model.predict(input_data)

# Output predictions
print("Predicted Efficiency:", predictions)


Predicted Efficiency: [772.98527279]




In [45]:
import pickle
from google.colab import files

# Save the efficiency model
with open('efficiency_model.pkl', 'wb') as file:
    pickle.dump(efficiency_model, file)

# Save the temperature model (assuming you have it trained as well)
with open('temperature_model.pkl', 'wb') as file:
    pickle.dump(temperature_model, file)

# Download the models
files.download('efficiency_model.pkl')
files.download('temperature_model.pkl')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [46]:
print(efficiency_model.coef_)
print(efficiency_model.intercept_)



[0.0530027  3.32637765]
63.84542601508002


In [47]:
import pickle
from google.colab import files

# Save the efficiency model again
with open('efficiency_model.pkl', 'wb') as file:
    pickle.dump(efficiency_model, file)

# Download the model
files.download('efficiency_model.pkl')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [48]:
from sklearn.externals import joblib  # If using older versions of sklearn
import joblib

# Save the efficiency model using joblib
joblib.dump(efficiency_model, 'efficiency_model.joblib')

# Download the model
files.download('efficiency_model.joblib')


ImportError: cannot import name 'joblib' from 'sklearn.externals' (/usr/local/lib/python3.10/dist-packages/sklearn/externals/__init__.py)

In [49]:
import joblib

# Save the efficiency model using joblib
joblib.dump(efficiency_model, 'efficiency_model.joblib')

# Download the model
from google.colab import files
files.download('efficiency_model.joblib')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [50]:
import joblib
import pandas as pd

# Load the efficiency model
efficiency_model = joblib.load('/content/efficiency_model.pkl')

# Create new input data for prediction
# Adjust this according to your feature set (RT, kW_Tot, etc.)
input_data = pd.DataFrame({
    'RT': [200.0],     # Example value for RT
    'kW_Tot': [210.0]  # Example value for kW_Tot
})

# Make predictions
predictions = efficiency_model.predict(input_data)

# Output predictions
print("Predicted Efficiency:", predictions)


Predicted Efficiency: [772.98527279]




In [51]:
from google.colab import files

# Download the efficiency model
files.download('/content/efficiency_model.pkl')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [52]:
import dask.dataframe as dd
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import train_test_split
import joblib

# Step 1: Load the Temperature Data
temperature_data = dd.read_csv('/content/ECCO-19400_19400_-sensors-data-_3_.csv', assume_missing=True)
print("Temperature Data:")
print(temperature_data.head())

# Step 2: Clean the Temperature Data
temperature_data['DateTime'] = dd.to_datetime(temperature_data['DateTime'], errors='coerce')
temperature_data = temperature_data.map_partitions(lambda df: df.ffill())
temperature_data = temperature_data.compute()  # Compute to get the cleaned data
print("Cleaned Temperature Data:")
print(temperature_data.head())

# Step 3: Define Features and Target for Temperature Data
temperature_features = ['Temperature [°C]', 'RH [%]', 'WBT_C']
# Check available columns
print("Available columns in temperature data:", temperature_data.columns)

# Update the target variable based on your data (change 'RT' to an appropriate column)
temperature_target = 'Temperature [°C]'  # Example target, adjust as needed

# Ensure target has no missing values
X_temp = temperature_data[temperature_features]
y_temp = temperature_data[temperature_target].dropna()
X_temp = X_temp.loc[y_temp.index]  # Align X with y

# Step 4: Train-Test Split for Temperature Data
X_temp_train, X_temp_test, y_temp_train, y_temp_test = train_test_split(X_temp, y_temp, test_size=0.2, random_state=42)

# Step 5: Model Training for Temperature Data
temp_model = RandomForestRegressor(n_estimators=100, random_state=42)
temp_model.fit(X_temp_train, y_temp_train)

# Step 6: Make Predictions and Evaluate the Model for Temperature Data
y_temp_pred = temp_model.predict(X_temp_test)
temp_mae = mean_absolute_error(y_temp_test, y_temp_pred)
temp_rmse = mean_squared_error(y_temp_test, y_temp_pred, squared=False)

print(f'Temperature Data - Mean Absolute Error (MAE): {temp_mae}')
print(f'Temperature Data - Root Mean Squared Error (RMSE): {temp_rmse}')

# Step 7: Save the Temperature Model
joblib.dump(temp_model, 'temperature_model.pkl')

# ----------------------------------

# Step 8: Load the Efficiency Data
efficiency_data = dd.read_csv('/content/TableData (6).csv', assume_missing=True)
print("Efficiency Data:")
print(efficiency_data.head())

# Step 9: Clean the Efficiency Data
efficiency_data['Time'] = dd.to_datetime(efficiency_data['Time'], errors='coerce')
efficiency_data = efficiency_data.map_partitions(lambda df: df.ffill())
efficiency_data = efficiency_data.compute()  # Compute to get the cleaned data
print("Cleaned Efficiency Data:")
print(efficiency_data.head())

# Step 10: Define Features and Target for Efficiency Data
efficiency_features = ['kW_RT', 'CH Load']  # Choose relevant features
# Check available columns
print("Available columns in efficiency data:", efficiency_data.columns)

# Update the target variable based on your data
efficiency_target = 'kW_Tot'  # Example target, adjust as needed

# Ensure target has no missing values
X_eff = efficiency_data[efficiency_features]
y_eff = efficiency_data[efficiency_target].dropna()
X_eff = X_eff.loc[y_eff.index]  # Align X with y

# Step 11: Train-Test Split for Efficiency Data
X_eff_train, X_eff_test, y_eff_train, y_eff_test = train_test_split(X_eff, y_eff, test_size=0.2, random_state=42)

# Step 12: Model Training for Efficiency Data
eff_model = RandomForestRegressor(n_estimators=100, random_state=42)
eff_model.fit(X_eff_train, y_eff_train)

# Step 13: Make Predictions and Evaluate the Model for Efficiency Data
y_eff_pred = eff_model.predict(X_eff_test)
eff_mae = mean_absolute_error(y_eff_test, y_eff_pred)
eff_rmse = mean_squared_error(y_eff_test, y_eff_pred, squared=False)

print(f'Efficiency Data - Mean Absolute Error (MAE): {eff_mae}')
print(f'Efficiency Data - Root Mean Squared Error (RMSE): {eff_rmse}')

# Step 14: Save the Efficiency Model
joblib.dump(eff_model, 'efficiency_model.pkl')

Temperature Data:
                  DateTime  RH [%]  Temperature [°C]  WBT_C
0  2024-03-01 00:00:42.000    68.4              30.8   26.1
1  2024-03-01 00:10:42.000    68.3              30.8   26.1
2  2024-03-01 00:20:42.000    68.3              30.8   26.0
3  2024-03-01 00:30:42.000    68.4              30.8   26.1
4  2024-03-01 00:40:42.000    68.6              30.7   26.0
Cleaned Temperature Data:
             DateTime  RH [%]  Temperature [°C]  WBT_C
0 2024-03-01 00:00:42    68.4              30.8   26.1
1 2024-03-01 00:10:42    68.3              30.8   26.1
2 2024-03-01 00:20:42    68.3              30.8   26.0
3 2024-03-01 00:30:42    68.4              30.8   26.1
4 2024-03-01 00:40:42    68.6              30.7   26.0
Available columns in temperature data: Index(['DateTime', 'RH [%]', 'Temperature [°C]', 'WBT_C'], dtype='object')
Temperature Data - Mean Absolute Error (MAE): 0.0002807547170267368
Temperature Data - Root Mean Squared Error (RMSE): 0.010912101642861036
Efficiency D

['efficiency_model.pkl']

In [53]:
from google.colab import files

# Download the efficiency model
files.download('/content/efficiency_model.pkl')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [54]:
import joblib
import pandas as pd

# Load the efficiency model
efficiency_model = joblib.load('/content/efficiency_model.pkl')

# Create new input data for prediction
# Adjust these values according to your feature set
input_data = pd.DataFrame({
    'RT': [200.0],     # Example value for RT
    'kW_Tot': [210.0]  # Example value for kW_Tot
})

# Make predictions
predictions = efficiency_model.predict(input_data)

# Output predictions
print("Predicted Efficiency:", predictions[0])



ValueError: The feature names should match those that were passed during fit.
Feature names unseen at fit time:
- RT
- kW_Tot
Feature names seen at fit time, yet now missing:
- CH Load
- kW_RT


In [55]:
import joblib
import pandas as pd

# Load the efficiency model
efficiency_model = joblib.load('/content/efficiency_model.pkl')

# Create new input data for prediction with correct features
input_data = pd.DataFrame({
    'CH Load': [39.6],  # Example value for CH Load
    'RT': [200.0],      # Example value for RT
    'kW_RT': [1.037],   # Example value for kW_RT
    'kW_Tot': [210.0],  # Example value for kW_Tot
    # Include any other features your model requires
})

# Make predictions
predictions = efficiency_model.predict(input_data)

# Output predictions
print("Predicted Efficiency:", predictions[0])


ValueError: The feature names should match those that were passed during fit.
Feature names unseen at fit time:
- RT
- kW_Tot


In [56]:
import joblib
import pandas as pd

# Load the efficiency model
efficiency_model = joblib.load('/content/efficiency_model.pkl')

# Create new input data for prediction with correct features
input_data = pd.DataFrame({
    'CH Load': [39.6],  # Example value for CH Load
    'RT': [200.0],      # Example value for RT
    'kW_RT': [1.037],   # Example value for kW_RT
})

# Make predictions
predictions = efficiency_model.predict(input_data)

# Output predictions
print("Predicted Efficiency:", predictions[0])


ValueError: The feature names should match those that were passed during fit.
Feature names unseen at fit time:
- RT


In [57]:
from sklearn.linear_model import LinearRegression

model = LinearRegression()
model.fit(X_train, y_train)

# Save the trained model
with open('efficiency_model.pkl', 'wb') as file:
    pickle.dump(model, file)


In [58]:
import dask.dataframe as dd
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import train_test_split
import joblib

# Step 1: Load the Temperature Data
temperature_data = dd.read_csv('/content/ECCO-19400_19400_-sensors-data-_3_.csv', assume_missing=True)
print("Temperature Data:")
print(temperature_data.head())

# Step 2: Clean the Temperature Data
temperature_data['DateTime'] = dd.to_datetime(temperature_data['DateTime'], errors='coerce')
temperature_data = temperature_data.map_partitions(lambda df: df.ffill())
temperature_data = temperature_data.compute()  # Compute to get the cleaned data
print("Cleaned Temperature Data:")
print(temperature_data.head())

# Step 3: Define Features and Target for Temperature Data
temperature_features = ['Temperature [°C]', 'RH [%]', 'WBT_C']
# Check available columns
print("Available columns in temperature data:", temperature_data.columns)

# Update the target variable based on your data (change 'RT' to an appropriate column)
temperature_target = 'Temperature [°C]'  # Example target, adjust as needed

# Ensure target has no missing values
X_temp = temperature_data[temperature_features]
y_temp = temperature_data[temperature_target].dropna()
X_temp = X_temp.loc[y_temp.index]  # Align X with y

# Step 4: Train-Test Split for Temperature Data
X_temp_train, X_temp_test, y_temp_train, y_temp_test = train_test_split(X_temp, y_temp, test_size=0.2, random_state=42)

# Step 5: Model Training for Temperature Data
temp_model = RandomForestRegressor(n_estimators=100, random_state=42)
temp_model.fit(X_temp_train, y_temp_train)

# Step 6: Make Predictions and Evaluate the Model for Temperature Data
y_temp_pred = temp_model.predict(X_temp_test)
temp_mae = mean_absolute_error(y_temp_test, y_temp_pred)
temp_rmse = mean_squared_error(y_temp_test, y_temp_pred, squared=False)

print(f'Temperature Data - Mean Absolute Error (MAE): {temp_mae}')
print(f'Temperature Data - Root Mean Squared Error (RMSE): {temp_rmse}')

# Step 7: Save the Temperature Model
joblib.dump(temp_model, 'temperature_model.pkl')

# ----------------------------------

# Step 8: Load the Efficiency Data
efficiency_data = dd.read_csv('/content/TableData (6).csv', assume_missing=True)
print("Efficiency Data:")
print(efficiency_data.head())

# Step 9: Clean the Efficiency Data
efficiency_data['Time'] = dd.to_datetime(efficiency_data['Time'], errors='coerce')
efficiency_data = efficiency_data.map_partitions(lambda df: df.ffill())
efficiency_data = efficiency_data.compute()  # Compute to get the cleaned data
print("Cleaned Efficiency Data:")
print(efficiency_data.head())

# Step 10: Define Features and Target for Efficiency Data
efficiency_features = ['kW_RT', 'CH Load']  # Choose relevant features
# Check available columns
print("Available columns in efficiency data:", efficiency_data.columns)

# Update the target variable based on your data
efficiency_target = 'kW_Tot'  # Example target, adjust as needed

# Ensure target has no missing values
X_eff = efficiency_data[efficiency_features]
y_eff = efficiency_data[efficiency_target].dropna()
X_eff = X_eff.loc[y_eff.index]  # Align X with y

# Step 11: Train-Test Split for Efficiency Data
X_eff_train, X_eff_test, y_eff_train, y_eff_test = train_test_split(X_eff, y_eff, test_size=0.2, random_state=42)

# Step 12: Model Training for Efficiency Data
eff_model = RandomForestRegressor(n_estimators=100, random_state=42)
eff_model.fit(X_eff_train, y_eff_train)

# Step 13: Make Predictions and Evaluate the Model for Efficiency Data
y_eff_pred = eff_model.predict(X_eff_test)
eff_mae = mean_absolute_error(y_eff_test, y_eff_pred)
eff_rmse = mean_squared_error(y_eff_test, y_eff_pred, squared=False)

print(f'Efficiency Data - Mean Absolute Error (MAE): {eff_mae}')
print(f'Efficiency Data - Root Mean Squared Error (RMSE): {eff_rmse}')

# Step 14: Save the Efficiency Model
joblib.dump(eff_model, 'efficiency_model.pkl')

Temperature Data:
                  DateTime  RH [%]  Temperature [°C]  WBT_C
0  2024-03-01 00:00:42.000    68.4              30.8   26.1
1  2024-03-01 00:10:42.000    68.3              30.8   26.1
2  2024-03-01 00:20:42.000    68.3              30.8   26.0
3  2024-03-01 00:30:42.000    68.4              30.8   26.1
4  2024-03-01 00:40:42.000    68.6              30.7   26.0
Cleaned Temperature Data:
             DateTime  RH [%]  Temperature [°C]  WBT_C
0 2024-03-01 00:00:42    68.4              30.8   26.1
1 2024-03-01 00:10:42    68.3              30.8   26.1
2 2024-03-01 00:20:42    68.3              30.8   26.0
3 2024-03-01 00:30:42    68.4              30.8   26.1
4 2024-03-01 00:40:42    68.6              30.7   26.0
Available columns in temperature data: Index(['DateTime', 'RH [%]', 'Temperature [°C]', 'WBT_C'], dtype='object')
Temperature Data - Mean Absolute Error (MAE): 0.0002807547170267368
Temperature Data - Root Mean Squared Error (RMSE): 0.010912101642861036
Efficiency D

['efficiency_model.pkl']

In [59]:
import pickle
import pandas as pd

# Load the efficiency model
with open('efficiency_model.pkl', 'rb') as file:
    efficiency_model = pickle.load(file)

# Create new input data for prediction
# Ensure these columns match the ones used in X_train
input_data = pd.DataFrame({
    'RT': [200.0],      # Example value for RT
    'kW_Tot': [208.7],  # Example value for kW_Tot
    # Add any other features that were used in X_train
})

# Make predictions
predictions = efficiency_model.predict(input_data)

# Output predictions
print("Predicted Efficiency:", predictions[0])


AttributeError: 'numpy.ndarray' object has no attribute 'predict'

In [60]:
from sklearn.linear_model import LinearRegression
import pickle

# Train your model
model = LinearRegression()
model.fit(X_train, y_train)

# Save the trained model
with open('efficiency_model.pkl', 'wb') as file:
    pickle.dump(model, file)


In [61]:
# Load the efficiency model
with open('efficiency_model.pkl', 'rb') as file:
    efficiency_model = pickle.load(file)

# Check if the loaded model is indeed a model
print(type(efficiency_model))


<class 'sklearn.linear_model._base.LinearRegression'>


In [62]:
# Create input data for prediction
input_data = pd.DataFrame({
    'RT': [200.0],      # Replace with actual values
    'kW_Tot': [208.7],  # Replace with actual values
})

# Make predictions
predictions = efficiency_model.predict(input_data)

# Output predictions
print("Predicted Efficiency:", predictions[0])


Predicted Efficiency: 768.6609818490685




In [63]:
from google.colab import files

# Download the efficiency model
files.download('efficiency_model.pkl')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [64]:
import dask.dataframe as dd
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import train_test_split
import joblib

# Step 1: Load the Temperature Data
temperature_data = dd.read_csv('/content/ECCO-19400_19400_-sensors-data-_3_.csv', assume_missing=True)
print("Temperature Data:")
print(temperature_data.head())

# Step 2: Clean the Temperature Data
temperature_data['DateTime'] = dd.to_datetime(temperature_data['DateTime'], errors='coerce')
temperature_data = temperature_data.map_partitions(lambda df: df.ffill())
temperature_data = temperature_data.compute()  # Compute to get the cleaned data
print("Cleaned Temperature Data:")
print(temperature_data.head())

# Step 3: Define Features and Target for Temperature Data
temperature_features = ['Temperature [°C]', 'RH [%]', 'WBT_C']
# Check available columns
print("Available columns in temperature data:", temperature_data.columns)

# Update the target variable based on your data (change 'RT' to an appropriate column)
temperature_target = 'Temperature [°C]'  # Example target, adjust as needed

# Ensure target has no missing values
X_temp = temperature_data[temperature_features]
y_temp = temperature_data[temperature_target].dropna()
X_temp = X_temp.loc[y_temp.index]  # Align X with y

# Step 4: Train-Test Split for Temperature Data
X_temp_train, X_temp_test, y_temp_train, y_temp_test = train_test_split(X_temp, y_temp, test_size=0.2, random_state=42)

# Step 5: Model Training for Temperature Data
temp_model = RandomForestRegressor(n_estimators=100, random_state=42)
temp_model.fit(X_temp_train, y_temp_train)

# Step 6: Make Predictions and Evaluate the Model for Temperature Data
y_temp_pred = temp_model.predict(X_temp_test)
temp_mae = mean_absolute_error(y_temp_test, y_temp_pred)
temp_rmse = mean_squared_error(y_temp_test, y_temp_pred, squared=False)

print(f'Temperature Data - Mean Absolute Error (MAE): {temp_mae}')
print(f'Temperature Data - Root Mean Squared Error (RMSE): {temp_rmse}')

# Step 7: Save the Temperature Model
joblib.dump(temp_model, 'temperature_model.pkl')

# ----------------------------------

# Step 8: Load the Efficiency Data
efficiency_data = dd.read_csv('/content/TableData (6).csv', assume_missing=True)
print("Efficiency Data:")
print(efficiency_data.head())

# Step 9: Clean the Efficiency Data
efficiency_data['Time'] = dd.to_datetime(efficiency_data['Time'], errors='coerce')
efficiency_data = efficiency_data.map_partitions(lambda df: df.ffill())
efficiency_data = efficiency_data.compute()  # Compute to get the cleaned data
print("Cleaned Efficiency Data:")
print(efficiency_data.head())

# Step 10: Define Features and Target for Efficiency Data
efficiency_features = ['kW_RT', 'CH Load']  # Choose relevant features
# Check available columns
print("Available columns in efficiency data:", efficiency_data.columns)

# Update the target variable based on your data
efficiency_target = 'kW_Tot'  # Example target, adjust as needed

# Ensure target has no missing values
X_eff = efficiency_data[efficiency_features]
y_eff = efficiency_data[efficiency_target].dropna()
X_eff = X_eff.loc[y_eff.index]  # Align X with y

# Step 11: Train-Test Split for Efficiency Data
X_eff_train, X_eff_test, y_eff_train, y_eff_test = train_test_split(X_eff, y_eff, test_size=0.2, random_state=42)

# Step 12: Model Training for Efficiency Data
eff_model = RandomForestRegressor(n_estimators=100, random_state=42)
eff_model.fit(X_eff_train, y_eff_train)

# Step 13: Make Predictions and Evaluate the Model for Efficiency Data
y_eff_pred = eff_model.predict(X_eff_test)
eff_mae = mean_absolute_error(y_eff_test, y_eff_pred)
eff_rmse = mean_squared_error(y_eff_test, y_eff_pred, squared=False)

print(f'Efficiency Data - Mean Absolute Error (MAE): {eff_mae}')
print(f'Efficiency Data - Root Mean Squared Error (RMSE): {eff_rmse}')

# Step 14: Save the Efficiency Model
joblib.dump(eff_model, 'efficiency_model.pkl')

Temperature Data:
                  DateTime  RH [%]  Temperature [°C]  WBT_C
0  2024-03-01 00:00:42.000    68.4              30.8   26.1
1  2024-03-01 00:10:42.000    68.3              30.8   26.1
2  2024-03-01 00:20:42.000    68.3              30.8   26.0
3  2024-03-01 00:30:42.000    68.4              30.8   26.1
4  2024-03-01 00:40:42.000    68.6              30.7   26.0
Cleaned Temperature Data:
             DateTime  RH [%]  Temperature [°C]  WBT_C
0 2024-03-01 00:00:42    68.4              30.8   26.1
1 2024-03-01 00:10:42    68.3              30.8   26.1
2 2024-03-01 00:20:42    68.3              30.8   26.0
3 2024-03-01 00:30:42    68.4              30.8   26.1
4 2024-03-01 00:40:42    68.6              30.7   26.0
Available columns in temperature data: Index(['DateTime', 'RH [%]', 'Temperature [°C]', 'WBT_C'], dtype='object')
Temperature Data - Mean Absolute Error (MAE): 0.0002807547170267368
Temperature Data - Root Mean Squared Error (RMSE): 0.010912101642861036
Efficiency D

['efficiency_model.pkl']

In [65]:
import joblib

# Train your model
model = LinearRegression()
model.fit(X_train, y_train)

# Save the trained model using Joblib
joblib.dump(model, 'efficiency_model.joblib')


['efficiency_model.joblib']

In [66]:
print("Coefficients:", model.coef_)
print("Intercept:", model.intercept_)


Coefficients: [0.0530027  3.32637765]
Intercept: 63.84542601508002


In [67]:
from google.colab import files

# Download the model
files.download('efficiency_model.joblib')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [68]:
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Predict on the training set (or use a validation set if you have one)
predictions = model.predict(X_train)

# Calculate performance metrics
mae = mean_absolute_error(y_train, predictions)
rmse = np.sqrt(mean_squared_error(y_train, predictions))
r2 = r2_score(y_train, predictions)

# Output the performance metrics
print(f'Mean Absolute Error (MAE): {mae}')
print(f'Root Mean Squared Error (RMSE): {rmse}')
print(f'R-squared: {r2}')


Mean Absolute Error (MAE): 3.3799517012135984
Root Mean Squared Error (RMSE): 6.046575223634643
R-squared: 0.9923168018545561


In [69]:
import pandas as pd

# Sample input data (adjust these values based on your feature columns)
sample_input = {
    'RT': [180, 190, 200],      # Replace with actual values
    'kW_Tot': [205, 210, 215]   # Replace with actual values
}

# Create a DataFrame
input_data = pd.DataFrame(sample_input)

# Make predictions
predictions = efficiency_model.predict(input_data)

# Output predictions
for i, pred in enumerate(predictions):
    print(f"Prediction for input {i + 1}: {pred}")


Prediction for input 1: 755.2933304695036
Prediction for input 2: 772.4552457482248
Prediction for input 3: 789.617161026946




In [70]:
from google.colab import files

# Download the efficiency model
files.download('efficiency_model.pkl')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>