## Combined_Log_Clean.csv
### Dataset derived from `Combined_Log_Transformed.csv`
#### All blanks removed

In [1]:
import pandas as pd

# read the combined dataset
df = pd.read_csv('./CSV/Combined_Log_Transformed.csv')

# drop any rows with missing values (blank cells)
df = df.dropna()

# save to csv
df.to_csv('./CSV/Combined_Log_Clean.csv', index=False)


### Row and column information

In [2]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 303 entries, 73 to 446
Columns: 128 entries, date to Real Oil Prices
dtypes: float64(127), object(1)
memory usage: 305.4+ KB


### Statistical data of the real oil price dataset after log transformation

In [3]:
# Descriptive statistics of real oil price
print("Descriptive Statistics for 'Real Oil Prices':")
print(df['Real Oil Prices'].describe())

Descriptive Statistics for 'Real Oil Prices':
count    303.000000
mean       3.081621
std        0.461147
min        1.932086
25%        2.708091
50%        3.067687
75%        3.512631
max        3.889308
Name: Real Oil Prices, dtype: float64


### Count Missing Values (blanks and NaNs) in the dataset

In [4]:
missing_values_count = df.isnull().sum() # if missing or NaN values exist, they will be counted and return True
total_missing_values = missing_values_count.sum()
print(f'Total missing values: {total_missing_values}')
print(missing_values_count) 

Total missing values: 0
date               0
RPI                0
W875RX1            0
DPCERA3M086SBEA    0
CMRMTSPLx          0
                  ..
DTCOLNVHFNM        0
DTCTHFNM           0
INVEST             0
VIXCLSx            0
Real Oil Prices    0
Length: 128, dtype: int64


### Count Zeros

In [5]:
zero_values_count = (df == 0).astype(int).sum(axis=0)
total_zero_values = zero_values_count.sum()
print(f'Total zero values: {total_zero_values}')
print(zero_values_count)

Total zero values: 34
date               0
RPI                0
W875RX1            0
DPCERA3M086SBEA    0
CMRMTSPLx          0
                  ..
DTCOLNVHFNM        0
DTCTHFNM           0
INVEST             0
VIXCLSx            0
Real Oil Prices    0
Length: 128, dtype: int64


### Count Negative Values

In [6]:
# Select only the numeric columns from DataFrame
numeric_df = df.select_dtypes(include=['number'])

negative_values_count = (numeric_df < 0).sum()
total_negative_values = negative_values_count.sum()
print(f'Total negative values: {total_negative_values}')
print(negative_values_count)

Total negative values: 1454
RPI                0
W875RX1            0
DPCERA3M086SBEA    0
CMRMTSPLx          0
RETAILx            0
                  ..
DTCOLNVHFNM        0
DTCTHFNM           0
INVEST             0
VIXCLSx            0
Real Oil Prices    0
Length: 127, dtype: int64
