In [1]:
import pandas as pd
import os

In [2]:
# Check directory and save all file names
directory = r"../data_jsk/original/"
file_names = []

for filename in os.listdir(directory):
    if filename.endswith(".csv"):
        file_names.append(filename)

In [3]:
# Function to transform datafiles
def transform_data(directory, file_name):

    # Read data
    df = pd.read_csv(directory + file_name)

    # Rename columns to datetime and humidity
    df.columns = ['datetime', 'temperature']

    # Convert datetime to datetime object
    df['datetime'] = pd.to_datetime(df['datetime'])

    # add 2 more columns in order to min and max values
    df['min_t'] = df['temperature']
    df['max_t'] = df['temperature']

    # resample and get mean, min and max values
    df_new = df.resample('D', on='datetime').agg({'temperature':'mean', 'min_t':'min', 'max_t':'max'})
    df_new.reset_index(inplace=True)
    df_new.rename(columns={'temperature':'mean_t'}, inplace=True)
    df_new['homeid'] = file_name.split('home')[1].split('.')[0]

    return df_new

In [4]:
# export dataframe to csv file and delete original file
def export_file(df, file_name):
    df.to_csv('../data_jsk/temperature_transformed/' + file_name, index=False)
    print(f'{file_name} exported successfully!')

    original_path = directory + file_name

    if os.path.exists(original_path):
        os.remove(original_path)
        print(f'Original {file_name} deleted successfully!')
        print('----------------------------------------------\n')

In [5]:
# loop through all files in the directory and transform them
status = 1
for file_name in file_names:
    print(f'Processing file {status} of {len(file_names)}')
    df = transform_data(directory, file_name)
    export_file(df, file_name)
    status += 1

Processing file 1 of 254
home299.csv exported successfully!
Original home299.csv deleted successfully!
----------------------------------------------

Processing file 2 of 254
home266.csv exported successfully!
Original home266.csv deleted successfully!
----------------------------------------------

Processing file 3 of 254
home272.csv exported successfully!
Original home272.csv deleted successfully!
----------------------------------------------

Processing file 4 of 254
home306.csv exported successfully!
Original home306.csv deleted successfully!
----------------------------------------------

Processing file 5 of 254
home69.csv exported successfully!
Original home69.csv deleted successfully!
----------------------------------------------

Processing file 6 of 254
home110.csv exported successfully!
Original home110.csv deleted successfully!
----------------------------------------------

Processing file 7 of 254
home96.csv exported successfully!
Original home96.csv deleted successfu

### Combine all the humidity data in one dataset

In [6]:
# Humidity directory
dir_hum = r"../data_jsk/temperature_transformed/"

file_names = []

# Iterate over files in directory
for name in os.listdir(dir_hum):
    if name.endswith(".csv"):
        file_names.append(name)

In [7]:
# Create empty list to store all dataframes from all the csv files
dfs = []

# Iterate over files in directory and save dataframe to dfs
for file in file_names:
    df = pd.read_csv(os.path.join(dir_hum, file))
    dfs.append(df)

# Concatenate all dataframes in dfs
df = pd.concat(dfs, ignore_index=True)

# Save concatenated dataframe to csv
df.to_csv("../data_jsk/temperature_data.csv", index=False)

In [8]:
df

Unnamed: 0,datetime,mean_t,min_t,max_t,homeid
0,2018-02-20,172.944122,164.0,182.0,299
1,2018-02-21,166.892131,152.0,176.0,299
2,2018-02-22,167.959710,153.0,176.0,299
3,2018-02-23,164.040397,142.0,177.0,299
4,2018-02-24,159.414724,136.0,181.0,299
...,...,...,...,...,...
71010,2018-06-26,208.935495,206.0,213.0,280
71011,2018-06-27,210.504754,207.0,215.0,280
71012,2018-06-28,213.664361,208.0,220.0,280
71013,2018-06-29,210.950648,208.0,214.0,280
