In [3]:
import os
import pandas as pd
import json

# Helper function to read a JSON file and return a DataFrame
def json_to_df(filepath):
    with open(filepath, 'r') as f:
        data = json.load(f)
    
    # Extract 'pricePoints' and convert to DataFrame
    df = pd.DataFrame(data['pricePoints'])
    
    # Convert 'timeStamp' to datetime and set as index
    df['timeStamp'] = pd.to_datetime(df['timeStamp'], unit='ms')
    df.set_index('timeStamp', inplace=True)
    
    # Return DataFrame with only the 'last' column, renamed to the file's base name
    filename = os.path.basename(filepath)
    return df[['last']].rename(columns={'last': filename[:-5]})

# Directory containing the JSON files
directory = '.'  # Current directory; adjust if necessary

# Get all JSON file paths
json_files = [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith('.json')]

# Initialize an empty list to hold DataFrames
dfs = []

# Process each JSON file
for file_path in json_files:
    df = json_to_df(file_path)
    dfs.append(df)

# Concatenate all DataFrames horizontally, aligning on index
all_data = pd.concat(dfs, axis=1)

# Save the consolidated DataFrame to a CSV file
all_data.to_csv('consolidated_data.csv')

print("CSV file has been created successfully.")


CSV file has been created successfully.


In [4]:
import pandas as pd
import numpy as np

# Load the CSV file
df = pd.read_csv('consolidated_data.csv', index_col=0, parse_dates=True)

# Forward-fill missing values
df_ffill = df.ffill()

# Take the logarithm of the data
df_log = np.log(df_ffill)

# Compute the first difference
df_diff = df_log.diff()

# Drop NA values
df_clean = df_diff.dropna()

# Save the processed data to a new CSV file
df_clean.to_csv('pm.csv')

print("Processed CSV file has been created successfully.")


Processed CSV file has been created successfully.
