In [27]:
import pandas as pd

# Corrected file path relative to the notebook
file_path = '../../data/raw/PreAssignmentDataSet_StatorTemp.csv'

# Load the data
df = pd.read_csv(file_path)

In [28]:
df.isna().sum()  # Check for missing values

timestamp_utc                           0
datetime_stamp_pacific                  0
C-02_avg_winding_temp(C)                0
C-04_avg_winding_temp(C)                3
C-05_avg_winding_temp(C)                2
C-06_avg_winding_temp(C)                2
C-07_avg_winding_temp(C)                2
C-02_total_current(A)                   2
C-04_total_current(A)                   3
C-05_total_current(A)                   2
C-06_total_current(A)                   2
C-07_total_current(A)                   2
C-02_avg_cooling_water_flow(gal/min)    2
C-04_avg_cooling_water_flow(gal/min)    3
C-05_avg_cooling_water_flow(gal/min)    2
C-06_avg_cooling_water_flow(gal/min)    2
C-07_avg_cooling_water_flow(gal/min)    2
C-02_avg_cooling_water_temp(C)          2
C-04_avg_cooling_water_temp(C)          3
C-05_avg_cooling_water_temp(C)          2
C-06_avg_cooling_water_temp(C)          2
C-07_avg_cooling_water_temp(C)          2
C-02_avg_cooling_air_out_temp(C)        2
C-04_avg_cooling_air_out_temp(C)  

In [None]:
# Q1: Count total NA values in the whole dataset
total_na = df.isna().sum().sum()

print(f"Total NA (missing) values: {total_na}")



Total NA (missing) values: 53


In [32]:
#Q1 # Fill all NA values with 0
df.fillna(0, inplace=True)

In [18]:
df.isnull().sum()  # Check for missing values

timestamp_utc                           0
datetime_stamp_pacific                  0
C-02_avg_winding_temp(C)                0
C-04_avg_winding_temp(C)                0
C-05_avg_winding_temp(C)                0
C-06_avg_winding_temp(C)                0
C-07_avg_winding_temp(C)                0
C-02_total_current(A)                   0
C-04_total_current(A)                   0
C-05_total_current(A)                   0
C-06_total_current(A)                   0
C-07_total_current(A)                   0
C-02_avg_cooling_water_flow(gal/min)    0
C-04_avg_cooling_water_flow(gal/min)    0
C-05_avg_cooling_water_flow(gal/min)    0
C-06_avg_cooling_water_flow(gal/min)    0
C-07_avg_cooling_water_flow(gal/min)    0
C-02_avg_cooling_water_temp(C)          0
C-04_avg_cooling_water_temp(C)          0
C-05_avg_cooling_water_temp(C)          0
C-06_avg_cooling_water_temp(C)          0
C-07_avg_cooling_water_temp(C)          0
C-02_avg_cooling_air_out_temp(C)        0
C-04_avg_cooling_air_out_temp(C)  

In [33]:
# Pivot dataframe to long format for compressor units
# Assuming the columns for compressor units are named like 'C-02_avg_winding_temp(C)', etc.

compressor_units = ['C-02', 'C-04', 'C-05', 'C-06', 'C-07']
long_df = pd.DataFrame()

for unit in compressor_units:
    unit_df = df[['timestamp_utc', 'datetime_stamp_pacific']].copy()
    unit_df['CompressorUnit_Details'] = unit
    unit_df['avg_winding_temp_C'] = df[f'{unit}_avg_winding_temp(C)']
    unit_df['total_current_A'] = df[f'{unit}_total_current(A)']
    unit_df['avg_cooling_water_flow_galpermin'] = df[f'{unit}_avg_cooling_water_flow(gal/min)']
    unit_df['avg_cooling_water_temp_C'] = df[f'{unit}_avg_cooling_water_temp(C)']
    unit_df['avg_cooling_air_out_temp_C'] = df[f'{unit}_avg_cooling_air_out_temp(C)']
    
    long_df = pd.concat([long_df, unit_df], ignore_index=True)

In [34]:
long_df.info()  # Display the structure of the long dataframe

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 43800 entries, 0 to 43799
Data columns (total 8 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   timestamp_utc                     43800 non-null  object 
 1   datetime_stamp_pacific            43800 non-null  object 
 2   CompressorUnit_Details            43800 non-null  object 
 3   avg_winding_temp_C                43800 non-null  float64
 4   total_current_A                   43800 non-null  float64
 5   avg_cooling_water_flow_galpermin  43800 non-null  float64
 6   avg_cooling_water_temp_C          43800 non-null  float64
 7   avg_cooling_air_out_temp_C        43800 non-null  float64
dtypes: float64(5), object(3)
memory usage: 2.7+ MB


In [26]:
long_df.head(5)

Unnamed: 0,timestamp_utc,datetime_stamp_pacific,CompressorUnit_Details,avg_winding_temp_C,total_current_A,avg_cooling_water_flow_galpermin),avg_cooling_water_temp_C,avg_cooling_air_out_temp_C
0,2018-01-01 08:00:00+00:00,2018-01-01 00:00:00,C-02,71.378429,8160.343625,179.440833,20.320124,30.761616
1,2018-01-01 09:00:00+00:00,2018-01-01 01:00:00,C-02,68.943909,7413.362667,176.793354,20.353998,30.574989
2,2018-01-01 10:00:00+00:00,2018-01-01 02:00:00,C-02,66.339036,6567.083083,175.072479,20.239409,30.214402
3,2018-01-01 11:00:00+00:00,2018-01-01 03:00:00,C-02,66.314878,7924.631875,174.1625,20.133054,29.90431
4,2018-01-01 12:00:00+00:00,2018-01-01 04:00:00,C-02,67.678225,8111.058042,174.318333,20.206669,29.980755


In [35]:
# Save reshaped data
long_df.to_csv('../../data/processed/reshaped_compressor_data.csv', index=False)