In [1]:
import pandas as pd
import os

In [2]:
# Check directory and save all file names
directory = r"../data_jsk/original/"
file_names = []

for filename in os.listdir(directory):
    if filename.endswith(".csv"):
        file_names.append(filename)

In [3]:
# Function to transform datafiles
def transform_data(directory, file_name):

    # Read data
    df = pd.read_csv(directory + file_name)

    # Rename columns to datetime and humidity
    df.columns = ['datetime', 'humidity']

    # Convert datetime to datetime object
    df['datetime'] = pd.to_datetime(df['datetime'])

    # add 2 more columns in order to min and max values
    df['min_h'] = df['humidity']
    df['max_h'] = df['humidity']

    # resample and get mean, min and max values
    df_new = df.resample('D', on='datetime').agg({'humidity':'mean', 'min_h':'min', 'max_h':'max'})
    df_new.reset_index(inplace=True)
    df_new.rename(columns={'humidity':'mean_h'}, inplace=True)
    df_new['homeid'] = file_name.split('home')[1].split('.')[0]

    return df_new

In [4]:
# export dataframe to csv file and delete original file
def export_file(df, file_name):
    df.to_csv('../data_jsk/humidity_transformed/' + file_name, index=False)
    print(f'{file_name} exported successfully!')

    original_path = directory + file_name

    if os.path.exists(original_path):
        os.remove(original_path)
        print(f'Original {file_name} deleted successfully!')
        print('----------------------------------------------\n')

In [5]:
# loop through all files in the directory and transform them
status = 1
for file_name in file_names:
    print(f'Processing file {status} of {len(file_names)}')
    df = transform_data(directory, file_name)
    export_file(df, file_name)
    status += 1

Processing file 1 of 237
home299.csv exported successfully!
Original home299.csv deleted successfully!
----------------------------------------------

Processing file 2 of 237
home266.csv exported successfully!
Original home266.csv deleted successfully!
----------------------------------------------

Processing file 3 of 237
home306.csv exported successfully!
Original home306.csv deleted successfully!
----------------------------------------------

Processing file 4 of 237
home69.csv exported successfully!
Original home69.csv deleted successfully!
----------------------------------------------

Processing file 5 of 237
home110.csv exported successfully!
Original home110.csv deleted successfully!
----------------------------------------------

Processing file 6 of 237
home96.csv exported successfully!
Original home96.csv deleted successfully!
----------------------------------------------

Processing file 7 of 237
home82.csv exported successfully!
Original home82.csv deleted successfull

### Combine all the humidity data in one dataset

In [10]:
# Humidity directory
dir_hum = r"../data_jsk/humidity_transformed/"

file_names = []

# Iterate over files in directory
for name in os.listdir(dir_hum):
    if name.endswith(".csv"):
        file_names.append(name)

In [12]:
# Create empty list to store all dataframes from all the csv files
dfs = []

# Iterate over files in directory and save dataframe to dfs
for file in file_names:
    df = pd.read_csv(os.path.join(dir_hum, file))
    dfs.append(df)

# Concatenate all dataframes in dfs
df = pd.concat(dfs, ignore_index=True)

# Save concatenated dataframe to csv
df.to_csv("../data_jsk/humidity_data.csv", index=False)