In [1]:
import torch
print("Cuda available: ", torch.cuda.is_available())
print("Cuda device count: ", torch.cuda.device_count())
print("Cuda current device: ", torch.cuda.current_device())
print("Cuda device name: ", torch.cuda.get_device_name(0))
print("Cuda device capability: ", torch.cuda.get_device_capability(0))
print("Cuda device memory: ", torch.cuda.get_device_properties(0).total_memory)
print("Cuda device memory: ", torch.cuda.get_device_properties(0).total_memory/1024**3, "GB")
print("Cuda device memory: ", torch.cuda.get_device_properties(0).total_memory/1024**2, "MB")
print("Cuda device memory: ", torch.cuda.get_device_properties(0).total_memory/1024, "KB")
# version
print("Cuda version: ", torch.version.cuda)
print("Cuda version: ", torch.version.cuda.split("."))
print("Cuda version: ", torch.version.cuda.split(".")[0])
print("Cuda version: ", torch.version.cuda.split(".")[1])

Cuda available:  True
Cuda device count:  1
Cuda current device:  0
Cuda device name:  NVIDIA GeForce RTX 3060
Cuda device capability:  (8, 6)
Cuda device memory:  12884246528
Cuda device memory:  11.9993896484375 GB
Cuda device memory:  12287.375 MB
Cuda device memory:  12582272.0 KB
Cuda version:  11.8
Cuda version:  ['11', '8']
Cuda version:  11
Cuda version:  8


In [2]:
# importaing the required libraries
import os
import sys
base_path = r"C:\Users\KAI\Coding\ThinkOnward_challenge\thinkOnward_TSClassification"
data_path = r"\data\building-instinct-starter-notebook\Starter notebook"
sys.path.append(base_path+data_path)
path = r"C:\Users\KAI\Coding\ThinkOnward_challenge\thinkOnward_TSClassification\kai"
sys.path.append(base_path+path)
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score
import matplotlib.pyplot as plt
from tqdm import tqdm

from utils import (calculate_average_hourly_energy_consumption, train_model, get_pred, calculate_hierarchical_f1_score,
sample_submission_generator)

In [3]:
def load_standard_df(folder_path):
    """
    Process multiple parquet files in a folder and return a pandas DataFrame with each row corresponding to one file in the folder.

    Parameters:
    - folder_path (str): Path to the folder containing parquet files.
    
    Returns:
    - df (pd.DataFrame): A pandas DataFrame with each row corresponding to one file in the folder (i.e. one building).
    """
    # Initialize an empty list to store individual DataFrames for each file
    result_dfs = []

    # Iterate through all files in the folder_path
    for file_name in tqdm(os.listdir(folder_path)):
        if file_name.endswith(".parquet"):
            # Extract the bldg_id from the file name
            bldg_id = int(file_name.split('.')[0])

            # Construct the full file path
            file_path = os.path.join(folder_path, file_name)

            # Read the original parquet file
            df = pd.read_parquet(file_path)

            # Convert 'timestamp' column to datetime
            df['timestamp'] = pd.to_datetime(df['timestamp'])
            result_df = df.pivot_table(values='out.electricity.total.energy_consumption', index='bldg_id', columns=['timestamp'])

            # Add 'bldg_id' index with values corresponding to the names of the parquet files
            result_df['bldg_id'] = bldg_id
            result_df.set_index('bldg_id', inplace=True)

            # Append the result_df to the list
            result_dfs.append(result_df)

    # Concatenate all individual DataFrames into a single DataFrame
    output_df = pd.concat(result_dfs, ignore_index=False)

    return output_df

In [4]:
file_path = base_path + data_path + r"\building-instinct-train-data"
df_features = load_standard_df(file_path)

 46%|████▌     | 3313/7200 [07:51<08:08,  7.96it/s]   