In [36]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import StandardScaler 

In [42]:
df = pd.read_csv('../data/processed/properties.csv')

# Assuming df is your DataFrame
numerical_columns = df.columns.difference(['period'])  # Exclude 'Period'

# Create a StandardScaler
scaler = StandardScaler()  

# Standardize the numerical columns
df[numerical_columns] = scaler.fit_transform(df[numerical_columns])

# Use the interpolate method to fill missing values with interpolation
df = df.interpolate()

# Sort the DataFrame by the 'period' column
df = df.sort_values(by='period')

df

Unnamed: 0,period,num_nodes,num_edges,density,avg_clustering,assortativity,avg_shortest_path
0,2000M01,-1.947821,-1.448398,-1.414800,-2.189090,-1.259842,-1.018583
1,2000M02,-1.527660,-1.407006,-1.627588,-2.567136,-1.201641,-0.817933
2,2000M03,-1.656940,-1.395717,-1.466600,-1.322957,-1.220972,-0.617283
3,2000M04,-1.656940,-1.399480,-1.482401,-0.880277,-1.406542,-0.521313
4,2000M05,-1.753900,-1.393208,-1.359877,-1.129810,-0.920837,-1.653585
...,...,...,...,...,...,...,...
280,2023M05,0.960982,0.830711,0.638994,0.813064,0.110855,-0.769097
281,2023M06,0.960982,0.767995,0.546720,1.035085,-0.004212,-0.500082
282,2023M07,0.767062,0.663886,0.625349,0.366248,-0.044437,-0.589960
283,2023M08,0.670102,0.612459,0.666572,1.194386,0.029263,-0.557070


### Uncertainty for each property

In [43]:
# Specify the value of h (number of periods for squared error calculation)
h = 3  # You can adjust this value based on your needs

# Create an empty DataFrame to store mean squared errors for each column
mse_df = pd.DataFrame({'period': df['period'].iloc[h:]})

# Calculate mean squared error for each column (except 'period') over h-periods
for column in df.columns[1:]:  # Exclude the 'period' column
    mse_mean_list = []
    for i in range(h, len(df)):
        squared_errors = (df[column].iloc[i - h:i] - df[column].iloc[i]) ** 2
        mse_mean = squared_errors.mean()
        mse_mean_list.append(mse_mean)
    
    # Add the mean squared errors for the column to mse_df
    mse_df[column] = mse_mean_list

# Display the resulting DataFrame
mse_df


Unnamed: 0,period,num_nodes,num_edges,density,avg_clustering,assortativity,avg_shortest_path
3,2000M04,0.033775,0.000821,0.008633,1.584817,0.032647,0.114824
4,2000M05,0.023329,0.000079,0.032690,0.721826,0.134947,1.018092
5,2000M06,0.003134,0.000057,0.002961,0.124749,0.180809,0.623717
6,2000M07,0.007312,0.000297,0.025329,0.147289,0.084700,0.709033
7,2000M08,0.051881,0.000356,0.082869,1.674135,0.105027,6.289414
...,...,...,...,...,...,...,...
280,2023M05,0.026115,0.012263,0.012198,0.009496,0.008705,0.049104
281,2023M06,0.009053,0.009199,0.021009,0.041304,0.028480,0.161011
282,2023M07,0.025418,0.012965,0.002217,0.280995,0.026691,0.082534
283,2023M08,0.059541,0.024824,0.005608,0.285532,0.004403,0.016428
